Ejemplo n.º 1
0
def populated_db(chanjo_db, exon_lines):
    exon_lines = list(exon_lines)
    result = link_elements(exon_lines)
    chanjo_db.add(*result.models)
    results = [
        load_transcripts(exon_lines, sample_id="sample", group_id="group"),
        load_transcripts(exon_lines, sample_id="sample2", group_id="group"),
    ]
    for result in results:
        chanjo_db.add(result.sample)
        chanjo_db.add(*result.models)
    chanjo_db.save()
    yield chanjo_db
Ejemplo n.º 2
0
def populated_db(chanjo_db, exon_lines):
    exon_lines = list(exon_lines)
    result = link_elements(exon_lines)
    chanjo_db.add(*result.models)
    results = [
        load_transcripts(exon_lines, sample_id='sample', group_id='group'),
        load_transcripts(exon_lines, sample_id='sample2', group_id='group')
    ]
    for result in results:
        chanjo_db.add(result.sample)
        chanjo_db.add(*result.models)
    chanjo_db.save()
    yield chanjo_db
Ejemplo n.º 3
0
def test_load_transcripts(exon_lines):
    # GIVEN sambamba depth output lines
    # WHEN loading transcript stats
    result = sambamba.load_transcripts(exon_lines, sample_id='sample',
                                       group_id='group')
    # THEN transcript models should be generated
    assert result.count == 10
    assert result.sample.id == 'sample'
    assert result.sample.group_id == 'group'
    assert isinstance(list(result.models)[0], TranscriptStat)

    # GIVEN no explicit sample id
    # WHEN loading transcript stats
    result = sambamba.load_transcripts(exon_lines)
    # THEN it should be picked up from file
    assert result.sample.id == 'ADM992A10'
Ejemplo n.º 4
0
def load(context, sample, group, name, group_name, threshold, bed_stream):
    """Load Sambamba output into the database for a sample."""
    chanjo_db = ChanjoDB(uri=context.obj['database'])
    source = os.path.abspath(bed_stream.name)

    result = load_transcripts(bed_stream,
                              sample_id=sample,
                              group_id=group,
                              source=source,
                              threshold=threshold)

    result.sample.name = name
    result.sample.group_name = group_name
    try:
        chanjo_db.add(result.sample)
        with click.progressbar(result.models,
                               length=result.count,
                               label='loading transcripts') as bar:
            for tx_model in bar:
                chanjo_db.add(tx_model)
        chanjo_db.save()
    except IntegrityError as error:
        LOG.error('sample already loaded, rolling back')
        LOG.debug(error.args[0])
        chanjo_db.session.rollback()
        context.abort()
Ejemplo n.º 5
0
def test_load_transcripts(exon_lines):
    # GIVEN sambamba depth output lines
    # WHEN loading transcript stats
    result = sambamba.load_transcripts(exon_lines,
                                       sample_id='sample',
                                       group_id='group')
    # THEN transcript models should be generated
    assert result.count == 9
    assert result.sample.id == 'sample'
    assert result.sample.group_id == 'group'
    assert isinstance(list(result.models)[0], TranscriptStat)

    # GIVEN no explicit sample id
    # WHEN loading transcript stats
    result = sambamba.load_transcripts(exon_lines)
    # THEN it should be picked up from file
    assert result.sample.id == 'ADM992A10'
Ejemplo n.º 6
0
def popexist_db(existing_db, exon_lines):
    result = link_elements(exon_lines)
    existing_db.add(*result.models)
    result = load_transcripts(exon_lines, sample_id="sample", group_id="group")
    existing_db.add(result.sample)
    existing_db.add(*result.models)
    existing_db.save()
    yield existing_db
Ejemplo n.º 7
0
def popexist_db(existing_db, exon_lines):
    result = link_elements(exon_lines)
    existing_db.add(*result.models)
    result = load_transcripts(exon_lines, sample_id='sample', group_id='group')
    existing_db.add(result.sample)
    existing_db.add(*result.models)
    existing_db.save()
    yield existing_db
Ejemplo n.º 8
0
def test_load_transcripts_with_threshold(exon_lines):
    # GIVEN a cutoff for "complete" exons at 100x
    threshold = 100
    # WHEN loading transcript stats
    result = sambamba.load_transcripts(exon_lines, threshold=threshold)
    # THEN some transcripts will have incomplete exons linked
    incompletes = [transcript for transcript in result.models
                   if transcript.incomplete_exons]
    assert len(incompletes) > 0
Ejemplo n.º 9
0
def test_load_transcripts_with_threshold(exon_lines):
    # GIVEN a cutoff for "complete" exons at 100x
    threshold = 100
    # WHEN loading transcript stats
    result = sambamba.load_transcripts(exon_lines, threshold=threshold)
    # THEN some transcripts will have incomplete exons linked
    incompletes = [
        transcript for transcript in result.models
        if transcript.incomplete_exons
    ]
    assert len(incompletes) > 0
Ejemplo n.º 10
0
Archivo: api.py Proyecto: mayabrandi/cg
    def upload(self, sample_id: str, sample_name: str, group_id: str, group_name: str,
               bed_stream: io.TextIOWrapper):
        """Upload coverage for a sample."""
        source = str(Path(bed_stream.name).absolute())
        result = load_transcripts(bed_stream, sample_id=sample_id, group_id=group_id,
                                  source=source, threshold=10)
        result.sample.name = sample_name
        result.sample.group_name = group_name

        try:
            self.add(result.sample)
            with click.progressbar(result.models, length=result.count,
                                   label=f"loading {sample_id}") as progress_bar:
                for tx_model in progress_bar:
                    self.add(tx_model)
            self.save()
        except IntegrityError as error:
            self.session.rollback()
            raise error
Ejemplo n.º 11
0
def load(context, sample, group, name, group_name, threshold, bed_stream):
    """Load Sambamba output into the database for a sample."""
    chanjo_db = ChanjoDB(uri=context.obj['database'])
    source = os.path.abspath(bed_stream.name)

    result = load_transcripts(bed_stream, sample_id=sample, group_id=group,
                              source=source, threshold=threshold)

    result.sample.name = name
    result.sample.group_name = group_name
    try:
        chanjo_db.add(result.sample)
        with click.progressbar(result.models, length=result.count,
                               label='loading transcripts') as bar:
            for tx_model in bar:
                chanjo_db.add(tx_model)
        chanjo_db.save()
    except IntegrityError as error:
        LOG.error('sample already loaded, rolling back')
        LOG.debug(error.args[0])
        chanjo_db.session.rollback()
        context.abort()