コード例 #1
0
def aggregate_vties(aggregate_queue):
    bucketed_seqs = {'success': {}, 'noresult': []}
    for result in aggregate_queue:
        if result['status'] == 'success':
            alignment = result['alignment']
            bucket_key = (funcs.format_ties(alignment.v_gene),
                          funcs.format_ties(alignment.j_gene),
                          len(alignment.cdr3))

            bucket = bucketed_seqs['success'].setdefault(bucket_key, {})
            if alignment.sequence.sequence in bucket:
                bucket[alignment.sequence.sequence].sequence.copy_number += (
                    alignment.sequence.copy_number)
            else:
                bucket[alignment.sequence.sequence] = alignment
        elif result['status'] == 'noresult':
            bucketed_seqs['noresult'].append(result)
        elif result['status'] == 'error':
            logger.error(
                'Unexpected error processing sequence {}\n\t{}'.format(
                    result['alignment'].sequence.seq_id))

    bucketed_seqs['success'] = [
        b.values() for b in bucketed_seqs['success'].values()
    ]
    return bucketed_seqs
コード例 #2
0
def import_alignments(session, args):
    parse_funcs = {
        'airr': (parse_airr, preprocess_airr),
    }

    meta_fn = args.metadata if args.metadata else os.path.join(
        args.sample_dir, 'metadata.tsv')
    if not os.path.isfile(meta_fn):
        logger.error('Metadata file not found.')
        return
    with open(meta_fn, 'rU') as fh:
        try:
            metadata = parse_metadata(session, fh, args.warn_existing,
                                      args.warn_missing, args.sample_dir)
        except MetadataException as ex:
            logger.error(ex.message)
            return

    props = IdentificationProps(**args.__dict__)
    v_germlines = raw_germlines(args.v_germlines, 'v')
    j_germlines = raw_germlines(args.j_germlines, 'j')

    for sample_name in sorted(metadata.keys()):
        sample = create_sample(session, metadata[sample_name])
        if sample:
            path = os.path.join(
                args.sample_dir, metadata[sample_name]['file_name'])
            with open(path) as fh:
                parse_file(fh, sample, session, parse_funcs[args.format][0],
                           props, v_germlines, j_germlines,
                           args.nproc,
                           preprocess_func=parse_funcs[args.format][1])
コード例 #3
0
def aggregate_vties(aggregate_queue):
    bucketed_seqs = {
        'success': {},
        'noresult': []
    }
    for result in aggregate_queue:
        if result['status'] == 'success':
            alignment = result['alignment']
            bucket_key = (
                funcs.format_ties(alignment.v_gene),
                funcs.format_ties(alignment.j_gene),
                len(alignment.cdr3)
            )

            bucket = bucketed_seqs['success'].setdefault(bucket_key, {})
            if alignment.sequence.sequence in bucket:
                bucket[alignment.sequence.sequence].sequence.copy_number += (
                    alignment.sequence.copy_number
                )
            else:
                bucket[alignment.sequence.sequence] = alignment
        elif result['status'] == 'noresult':
            bucketed_seqs['noresult'].append(result)
        elif result['status'] == 'error':
            logger.error(
                'Unexpected error processing sequence {}\n\t{}'.format(
                    result['alignment'].sequence.seq_id))

    bucketed_seqs['success'] = [
        b.values() for b in bucketed_seqs['success'].values()
    ]
    return bucketed_seqs
コード例 #4
0
def run_import(session, args):
    v_germlines = VGermlines(args.v_germlines)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3,
                             args.anchor_len)

    meta_fn = args.metadata if args.metadata else os.path.join(
        args.sample_dir, 'metadata.tsv')

    if not os.path.isfile(meta_fn):
        logger.error('Metadata file not found.')
        return

    with open(meta_fn, 'rU') as fh:
        try:
            metadata = parse_metadata(session, fh, args.warn_existing,
                                      args.warn_missing, args.sample_dir)
        except MetadataException as ex:
            logger.error(ex.message)
            return

    props = IdentificationProps(**args.__dict__)
    for sample_name in sorted(metadata.keys()):
        sample = create_sample(session, metadata[sample_name])
        if sample:
            path = os.path.join(args.sample_dir,
                                metadata[sample_name]['file_name'])
            with open(path) as fh:
                read_file(session, args.format, fh, sample, v_germlines,
                          j_germlines, props)
コード例 #5
0
def create_sample(session, metadata):
    study, new = funcs.get_or_create(
        session, Study, name=metadata['study_name'])

    if new:
        logger.info('Created new study "{}"'.format(study.name))
        session.commit()

    sample, new = funcs.get_or_create(
        session, Sample, name=metadata['sample_name'], study=study)
    if new:
        logger.info('Created new sample "{}"'.format(sample.name))
        for key, value in metadata.items():
            if key not in REQUIRED_FIELDS:
                session.add(SampleMetadata(
                    sample=sample,
                    key=key,
                    value=value
                ))

        subject, new = funcs.get_or_create(
            session, Subject, study=study,
            identifier=metadata['subject'])
        sample.subject = subject
        session.commit()
    else:
        logger.error(
            'Sample "{}" already exists'.format(metadata['sample_name']))
        return
    return sample
コード例 #6
0
ファイル: delimited.py プロジェクト: wangdi2014/immunedb
def run_import(session, args, remaps=None):
    v_germlines = VGermlines(args.v_germlines)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3,
                             args.anchor_len, args.min_anchor_len)

    study, new = funcs.get_or_create(session, Study, name=args.study_name)

    if new:
        logger.info('Created new study "{}"'.format(study.name))
        session.commit()

    sample, new = funcs.get_or_create(session,
                                      Sample,
                                      name=args.sample_name,
                                      study=study)
    if new:
        sample.date = args.date
        logger.info('Created new sample "{}"'.format(sample.name))
        for key in ('subset', 'tissue', 'disease', 'lab', 'experimenter',
                    'ig_class', 'v_primer', 'j_primer'):
            setattr(sample, key, vars(args).get(key, None))
        subject, new = funcs.get_or_create(session,
                                           Subject,
                                           study=study,
                                           identifier=args.subject)
        sample.subject = subject
        session.commit()
    else:
        logger.error('Sample "{}" already exists'.format(args.sample_name))
        return

    with open(args.input_file) as fh:
        read_file(session, fh, sample, v_germlines, j_germlines, args, remaps)
コード例 #7
0
def add_uniques(session,
                sample,
                vdjs,
                realign_len=None,
                realign_mut=None,
                min_similarity=0,
                max_vties=50,
                trim_to=None,
                max_padding=None):
    bucketed_seqs = OrderedDict()
    vdjs = sorted(vdjs, key=lambda v: v.ids[0])
    for vdj in funcs.periodic_commit(session, vdjs):
        try:
            if realign_len is not None:
                vdj.align_to_germline(realign_len, realign_mut, trim_to)
            if vdj.v_match / float(vdj.v_length) < min_similarity:
                raise AlignmentException('V-identity too low {} < {}'.format(
                    vdj.v_match / float(vdj.v_length), min_similarity))
            if len(vdj.v_gene) > max_vties:
                raise AlignmentException('Too many V-ties {} > {}'.format(
                    len(vdj.v_gene), max_vties))
            if max_padding is not None and vdj.pad_length > max_padding:
                raise AlignmentException('Too much padding {} (max {})'.format(
                    vdj.pad_length, max_padding))
            bucket_key = (funcs.format_ties(vdj.v_gene,
                                            vdj.v_germlines.prefix,
                                            strip_alleles=True),
                          funcs.format_ties(vdj.j_gene,
                                            vdj.j_germlines.prefix,
                                            strip_alleles=True), len(vdj.cdr3))
            if bucket_key not in bucketed_seqs:
                bucketed_seqs[bucket_key] = {}
            bucket = bucketed_seqs[bucket_key]

            if vdj.sequence in bucket:
                bucket[vdj.sequence].ids += vdj.ids
            else:
                bucket[vdj.sequence] = vdj
        except AlignmentException as e:
            add_as_noresult(session, vdj, sample, str(e))
        except:
            logger.error('\tUnexpected error processing sequence '
                         '{}\n\t{}'.format(vdj.ids[0], traceback.format_exc()))

    # Collapse sequences that are the same except for Ns
    for bucket, sequences in funcs.periodic_commit(session,
                                                   bucketed_seqs.iteritems()):
        sequences = sorted(sequences.values(),
                           key=lambda s: (len(s.ids), s.ids[0]),
                           reverse=True)
        while len(sequences) > 0:
            larger = sequences.pop(0)
            for i in reversed(range(len(sequences))):
                smaller = sequences[i]

                if dnautils.equal(larger.sequence, smaller.sequence):
                    larger.ids += smaller.ids
                    del sequences[i]
            add_as_sequence(session, larger, sample)
    session.commit()
コード例 #8
0
def run_import(session, args):
    v_germlines = VGermlines(args.v_germlines)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3,
                             args.anchor_len)

    meta_fn = args.metadata if args.metadata else os.path.join(
        args.sample_dir, 'metadata.tsv')

    if not os.path.isfile(meta_fn):
        logger.error('Metadata file not found.')
        return

    with open(meta_fn, 'rU') as fh:
        try:
            metadata = parse_metadata(session, fh, args.warn_existing,
                                      args.warn_missing, args.sample_dir)
        except MetadataException as ex:
            logger.error(ex.message)
            return

    props = IdentificationProps(**args.__dict__)
    for sample_name in sorted(metadata.keys()):
        sample = create_sample(session, metadata[sample_name])
        if sample:
            path = os.path.join(
                args.sample_dir, metadata[sample_name]['file_name'])
            with open(path) as fh:
                read_file(session, args.format, fh, sample, v_germlines,
                          j_germlines, props)
コード例 #9
0
ファイル: admin.py プロジェクト: Danielli-Itai/PyBioImmunedb
def create(main_parser, args):
    if re.search(r'[^A-Za-z0-9_-]', args.db_name) is not None:
        main_parser.error('Database name must only contain letters, numbers, '
                          'dashes and underscores.')

    try:
        conn = _get_root_connection(args.db_host, args.admin_user,
                                    args.admin_pass)

        db_user = args.db_user or args.db_name
        if args.db_pass:
            db_pass = args.db_pass
        else:
            db_pass = ''.join(
                random.choice(string.ascii_uppercase + string.ascii_lowercase +
                              string.digits) for _ in range(10))

        with conn.cursor() as cursor:
            logger.info('Creating user "{}"'.format(db_user))
            existing_password = _create_user_if_not_exists(conn, '%', db_user,
                                                           db_pass)
            if existing_password is not None:
                if not args.db_pass:
                    logger.warning(
                        'User {} already exists.  To generate the '
                        'configuration file, you must enter it\'s '
                        'password.'.format(db_user)
                    )
                    db_pass = _get_user_pass(conn, args.db_host, db_user,
                                             existing_password)
                else:
                    db_pass = args.db_pass

            logger.info('Creating database "{}"'.format(args.db_name))
            cursor.execute('CREATE DATABASE {}'.format(args.db_name))

            cursor.execute(
                'GRANT ALL PRIVILEGES ON {}.* TO \'{}\'@\'%\''.format(
                    args.db_name, db_user))

        config_path = os.path.join(args.config_dir, '{}.json'.format(
            args.db_name))
        logger.info('Creating config at {}'.format(config_path))
        with open(config_path, 'w+') as fh:
            json.dump({
                'host': args.db_host,
                'database': args.db_name,
                'username': db_user,
                'password': db_pass
            }, fh, sort_keys=True, indent=4, separators=(',', ': '))

        logger.info('Initializing tables')
        config.init_db(config_path)
        logger.info('Success!')
        return True
    except Exception as e:
        logger.error(e)
        return False
コード例 #10
0
def _get_user_pass(conn, host, user, existing_password):
    with conn.cursor() as cursor:
        while True:
            db_pass = getpass.getpass()
            cursor.execute('SELECT PASSWORD(%s) as password', db_pass)
            if cursor.fetchone()['password'] != existing_password:
                logger.error('Password does not match.')
            else:
                logger.info('Correct password')
                return db_pass
コード例 #11
0
ファイル: admin.py プロジェクト: MakotoTsu/immunedb
def _get_user_pass(conn, host, user, existing_password):
    with conn.cursor() as cursor:
        while True:
            db_pass = getpass.getpass()
            cursor.execute('SELECT PASSWORD(%s) as password', db_pass)
            if cursor.fetchone()['password'] != existing_password:
                logger.error('Password does not match.')
            else:
                logger.info('Correct password')
                return db_pass
コード例 #12
0
ファイル: __init__.py プロジェクト: wangdi2014/immunedb
def add_uniques(session,
                sample,
                alignments,
                props,
                aligner,
                realign_len=None,
                realign_mut=None):
    bucketed_seqs = OrderedDict()
    alignments = sorted(alignments, key=lambda v: v.sequence.ids[0])
    for alignment in funcs.periodic_commit(session, alignments):
        try:
            if realign_len is not None:
                aligner.align_to_germline(alignment, realign_len, realign_mut)
                if props.trim_to:
                    alignment.trim_to(props.trim_to)

            props.validate(alignment)
            bucket_key = (funcs.format_ties(alignment.v_gene),
                          funcs.format_ties(alignment.j_gene),
                          len(alignment.cdr3))

            if bucket_key not in bucketed_seqs:
                bucketed_seqs[bucket_key] = {}
            bucket = bucketed_seqs[bucket_key]

            if alignment.sequence.sequence in bucket:
                bucket[alignment.sequence.sequence].sequence.ids += (
                    alignment.sequence.ids)
            else:
                bucket[alignment.sequence.sequence] = alignment
        except AlignmentException as e:
            add_as_noresult(session, alignment.sequence, sample, str(e))
        except Exception:
            logger.error('\tUnexpected error processing sequence '
                         '{}\n\t{}'.format(alignment.sequence.ids[0],
                                           traceback.format_exc()))

    # Collapse sequences that are the same except for Ns
    for bucket, sequences in funcs.periodic_commit(session,
                                                   bucketed_seqs.iteritems()):
        sequences = sorted(sequences.values(),
                           key=lambda s:
                           (len(s.sequence.ids), s.sequence.ids[0]),
                           reverse=True)
        while len(sequences) > 0:
            larger = sequences.pop(0)
            for i in reversed(range(len(sequences))):
                smaller = sequences[i]

                if dnautils.equal(larger.sequence.sequence,
                                  smaller.sequence.sequence):
                    larger.sequence.ids += smaller.sequence.ids
                    del sequences[i]
            add_as_sequence(session, larger, sample)
    session.commit()
コード例 #13
0
ファイル: metadata.py プロジェクト: wangdi2014/immunedb
def parse_metadata(session, fh, warn_existing, path):
    reader = csv.DictReader(fh, delimiter='\t')
    provided_fields = set(reader.fieldnames)
    missing_fields = set(REQUIRED_FIELDS) - provided_fields
    if len(missing_fields) > 0:
        raise MetadataException(
            'Metadata is missing the following headers: {}'.format(
                ','.join(missing_fields)))
    unknown_fields = provided_fields - (set(REQUIRED_FIELDS).union(
        set(OPTIONAL_FIELDS)))
    if len(unknown_fields) > 0:
        logger.warning('Ignoring unknown headers in metadata: {}'.format(
            ','.join(unknown_fields)))

    metadata = {}
    for row in reader:
        row = {
            k: v
            for k, v in row.iteritems() if v is not None and len(v) > 0
        }
        if len(row) == 0:
            continue
        check_populated(row)
        # Check if the sample name is unique
        if row['sample_name'] in metadata:
            logger.error('Duplicate sample name {} in metadata.'.format(
                row['sample_name']))

        # Check if a sample with the same name is in the database
        sample_in_db = session.query(Sample).filter(
            Sample.name == row['sample_name'],
            exists().where(Sequence.sample_id == Sample.id)).first()
        if sample_in_db:
            message = 'Sample {} already exists. {}'.format(
                row['sample_name'],
                'Skipping.' if warn_existing else 'Cannot continue.')
            if warn_existing:
                logger.warning(message)
                continue
            else:
                raise MetadataException(message)

        # Check if specified file exists
        if not os.path.isfile(os.path.join(path, row['file_name'])):
            raise MetadataException(
                'File {} for sample {} does not exist'.format(
                    row['file_name'], row['sample_name']))

        metadata[row['sample_name']] = row

    return metadata
コード例 #14
0
ファイル: modify.py プロジェクト: Danielli-Itai/PyBioImmunedb
def combine_samples(session, args):
    groups = {}

    subjects = set()
    for meta in session.query(SampleMetadata).filter(
            SampleMetadata.key == args.combine_field):
        groups.setdefault(meta.value, set()).add(meta.sample)
        subjects.add(meta.sample.subject)

    for group_id, samples in groups.items():
        group_subs = set(s.subject for s in samples)
        if len(group_subs) > 1:
            logger.error('Cannot combine samples across subjects '
                         '(group "{}" has {} subjects)'.format(
                             group_id, len(group_subs)))
            sys.exit(1)

    for subject in subjects:
        subject.reset()

    for group_id, samples in groups.items():
        all_sample_ids = set(s.id for s in samples)
        final_sample_id = min(all_sample_ids)
        logger.info('Combining {} samples into new sample "{}" (ID {})'.format(
            len(samples), group_id, final_sample_id))
        session.query(Sequence).filter(
            Sequence.sample_id.in_(all_sample_ids)).update(
                {
                    Sequence.sample_id: final_sample_id,
                },
                synchronize_session=False)

        logger.info('Updating sample name and deleting empty samples')
        # collapse to one sample
        final_sample = session.query(Sample).get(final_sample_id)
        final_sample.name = group_id
        remove_duplicates(session, final_sample)

        logger.info('Moving noresults')
        session.query(NoResult).filter(
            NoResult.sample_id.in_(all_sample_ids)).update(
                {'sample_id': final_sample_id}, synchronize_session=False)

        # delete the now-empty samples
        session.query(Sample).filter(
            Sample.id.in_(all_sample_ids - set([final_sample_id]))).delete(
                synchronize_session=False)

    session.commit()
    logger.info('Sequences successfully collapsed: please re-run '
                'immunedb_collapse and later pipeline steps.')
コード例 #15
0
def run_rest_service(session_maker, args):
    if args.rollbar_token:
        if not ROLLBAR_SUPPORT:
            logger.error('Rollbar is not installed')
            return
        rbr = RollbarBottleReporter(access_token=args.rollbar_token,
                                    environment=args.rollbar_env)
        bottle.install(rbr)

    app.config['session_maker'] = session_maker
    app.config['allow_shutdown'] = args.allow_shutdown
    if args.debug:
        app.catchall = False
    app.run(host='0.0.0.0', port=args.port, server='gevent', debug=args.debug)
コード例 #16
0
def run_identify(session, args):
    mod_log.make_mod('identification',
                     session=session,
                     commit=True,
                     info=vars(args))
    session.close()
    # Load the germlines from files
    v_germlines = VGermlines(args.v_germlines)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3,
                             args.anchor_len, args.min_anchor_len)
    tasks = concurrent.TaskQueue()

    # If metadata is not specified, assume it is "metadata." in the
    # directory
    meta_fn = args.metadata if args.metadata else os.path.join(
        args.sample_dir, 'metadata.tsv')

    # Verify the metadata file exists
    if not os.path.isfile(meta_fn):
        logger.error('Metadata file not found.')
        return

    with open(meta_fn, 'rU') as fh:
        try:
            metadata = parse_metadata(session, fh, args.warn_existing,
                                      args.sample_dir)
        except MetadataException as ex:
            logger.error(ex.message)
            return

    # Create the tasks for each file
    for sample_name in sorted(metadata.keys()):
        tasks.add_task({
            'path':
            os.path.join(args.sample_dir, metadata[sample_name]['file_name']),
            'meta':
            metadata[sample_name]
        })

    props = IdentificationProps(**args.__dict__)
    lock = mp.Lock()
    for i in range(0, min(args.nproc, tasks.num_tasks())):
        worker_session = config.init_db(args.db_config)
        tasks.add_worker(
            IdentificationWorker(worker_session, v_germlines, j_germlines,
                                 props, lock))

    tasks.start()
コード例 #17
0
def delete(main_parser, args):
    try:
        with open(args.db_config) as fh:
            db_config = json.load(fh)
        conn = _get_root_connection(db_config['host'], args.admin_user,
                                    args.admin_pass)
        with conn.cursor() as cursor:
            logger.info('Deleting database {}'.format(db_config['database']))
            cursor.execute('DROP DATABASE `{}`'.format(db_config['database']))
            if args.delete_user:
                logger.info('Deleting user {}'.format(db_config['username']))
                cursor.execute('DROP USER `{}`'.format(db_config['username']))
        return True
    except Exception as e:
        logger.error(e)
        return False
コード例 #18
0
ファイル: admin.py プロジェクト: MakotoTsu/immunedb
def delete(main_parser, args):
    try:
        with open(args.db_config) as fh:
            db_config = json.load(fh)
        conn = _get_root_connection(db_config['host'], args.admin_user,
                                    args.admin_pass)
        with conn.cursor() as cursor:
            logger.info('Deleting database {}'.format(db_config['database']))
            cursor.execute('DROP DATABASE `{}`'.format(db_config['database']))
            if args.delete_user:
                logger.info('Deleting user {}'.format(db_config['username']))
                cursor.execute('DROP USER `{}`'.format(db_config['username']))
        return True
    except Exception as e:
        logger.error(e)
        return False
コード例 #19
0
ファイル: __init__.py プロジェクト: arosenfeld/immunedb
    def do_task(self, clone_id):
        clone_inst = self.session.query(Clone).filter(
            Clone.id == clone_id).first()
        if not clone_inst:
            return

        self.info('Running clone {}'.format(clone_inst.id))

        sequences = self.session.query(
            Sequence
        ).join(SequenceCollapse).filter(
            Sequence.clone_id == clone_id,
            SequenceCollapse.copy_number_in_subject >= self.min_seq_copies,
            SequenceCollapse.samples_in_subject >= self.min_seq_samples,
        )

        if self.exclude_stops:
            sequences = sequences.filter(Sequence.stop == 0)

        sequences = sequences.order_by(Sequence.v_length)

        try:
            tree = self.get_tree(clone_inst, sequences)
            if not tree:
                logger.warning('No sequences to make tree for clone {}'.format(
                    clone_id))
                return
        except Exception as e:
            logger.error('Error running clone {}: {}'.format(clone_id, e))
            return

        for node_id, node in enumerate(tree.traverse()):
            node.add_feature('node_id', node_id)
        final = {
            'info': {
                'min_mut_copies': self.min_mut_copies,
                'min_mut_samples': self.min_mut_samples,
                'min_seq_copies': self.min_seq_copies,
                'min_seq_samples': self.min_seq_samples,
                'exclude_stops': self.exclude_stops,
                'full_seq': self.full_seq,
            },
            'tree': tree_as_dict(tree)
        }
        clone_inst.tree = json.dumps(final)
        self.session.add(clone_inst)
        self.session.commit()
コード例 #20
0
    def do_task(self, clone_id):
        clone_inst = self.session.query(Clone).filter(
            Clone.id == clone_id).first()
        if not clone_inst:
            return

        self.info('Running clone {}'.format(clone_inst.id))

        sequences = self.session.query(Sequence).join(SequenceCollapse).filter(
            Sequence.clone_id == clone_id,
            SequenceCollapse.copy_number_in_subject >= self.min_seq_copies,
            SequenceCollapse.samples_in_subject >= self.min_seq_samples,
        )

        if self.exclude_stops:
            sequences = sequences.filter(Sequence.stop == 0)

        sequences = sequences.order_by(Sequence.v_length)

        try:
            tree = self.get_tree(clone_inst, sequences)
            if not tree:
                logger.warning(
                    'No sequences to make tree for clone {}'.format(clone_id))
                return
        except Exception as e:
            logger.error('Error running clone {}: {}'.format(clone_id, e))
            return

        for node_id, node in enumerate(tree.traverse()):
            node.add_feature('node_id', node_id)
        final = {
            'info': {
                'min_mut_copies': self.min_mut_copies,
                'min_mut_samples': self.min_mut_samples,
                'min_seq_copies': self.min_seq_copies,
                'min_seq_samples': self.min_seq_samples,
                'exclude_stops': self.exclude_stops,
                'full_seq': self.full_seq,
            },
            'tree': tree_as_dict(tree)
        }
        clone_inst.tree = json.dumps(final)
        self.session.add(clone_inst)
        self.session.commit()
コード例 #21
0
def run_rest_service(session_maker, args):
    if args.rollbar_token:
        if not ROLLBAR_SUPPORT:
            logger.error('Rollbar is not installed')
            return
        rbr = RollbarBottleReporter(access_token=args.rollbar_token,
                                    environment=args.rollbar_env)
        bottle.install(rbr)

    app = create_app(session_maker, args.allow_shutdown)
    if args.debug:
        app.catchall = False
    app.run(host='0.0.0.0',
            port=args.port,
            server=args.server,
            debug=args.debug,
            worker_class='eventlet',
            timeout=0)
コード例 #22
0
def aggregate_vdj(aggregate_queue):
    alignments = {'success': {}, 'noresult': []}
    for result in aggregate_queue:
        if result['status'] == 'success':
            alignment = result['alignment']
            seq_key = alignment.sequence.sequence
            if seq_key in alignments['success']:
                alignments['success'][seq_key].sequence.copy_number += (
                    alignment.sequence.copy_number)
            else:
                alignments['success'][seq_key] = alignment
        elif result['status'] == 'noresult':
            alignments['noresult'].append(result)
        elif result['status'] == 'error':
            logger.error(
                'Unexpected error processing sequence {}\n\t{}'.format(
                    result['vdj'].seq_id, result['reason']))
    alignments['success'] = alignments['success'].values()
    return alignments
コード例 #23
0
def run_identify(session, args):
    mod_log.make_mod('identification', session=session, commit=True,
                     info=vars(args))
    # Load the germlines from files
    v_germlines = VGermlines(args.v_germlines,
                             ties=args.ties and not args.genotyping)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3,
                             args.anchor_len, args.min_anchor_len,
                             ties=args.ties and not args.genotyping)

    # If metadata is not specified, assume it is "metadata." in the
    # directory
    meta_fn = args.metadata if args.metadata else os.path.join(
        args.sample_dir, 'metadata.tsv')

    # Verify the metadata file exists
    if not os.path.isfile(meta_fn):
        logger.error('Metadata file not found.')
        sys.exit(-1)

    with open(meta_fn, 'rU') as fh:
        try:
            metadata = parse_metadata(session, fh, args.warn_existing,
                                      args.warn_missing, args.sample_dir)
        except MetadataException as ex:
            logger.error(ex)
            sys.exit(-1)

    session.close()
    # Create the tasks for each file
    props = IdentificationProps(**args.__dict__)
    for sample_name in sorted(metadata.keys()):
        process_sample(
            args.db_config, v_germlines, j_germlines,
            os.path.join(
                args.sample_dir,
                metadata[sample_name]['file_name']
            ),
            metadata[sample_name],
            props,
            args.nproc
        )
コード例 #24
0
def run_identify(session, args):
    mod_log.make_mod('identification', session=session, commit=True,
                     info=vars(args))
    # Load the germlines from files
    v_germlines = VGermlines(args.v_germlines, no_ties=args.genotyping)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3,
                             args.anchor_len, args.min_anchor_len,
                             no_ties=args.genotyping)

    # If metadata is not specified, assume it is "metadata." in the
    # directory
    meta_fn = args.metadata if args.metadata else os.path.join(
        args.sample_dir, 'metadata.tsv')

    # Verify the metadata file exists
    if not os.path.isfile(meta_fn):
        logger.error('Metadata file not found.')
        sys.exit(-1)

    with open(meta_fn, 'rU') as fh:
        try:
            metadata = parse_metadata(session, fh, args.warn_existing,
                                      args.warn_missing, args.sample_dir)
        except MetadataException as ex:
            logger.error(ex)
            sys.exit(-1)

    session.close()
    # Create the tasks for each file
    props = IdentificationProps(**args.__dict__)
    for sample_name in sorted(metadata.keys()):
        process_sample(
            args.db_config, v_germlines, j_germlines,
            os.path.join(
                args.sample_dir,
                metadata[sample_name]['file_name']
            ),
            metadata[sample_name],
            props,
            args.nproc
        )
コード例 #25
0
    def do_task(self, clone_id):
        clone_inst = self._session.query(Clone).filter(
            Clone.id == clone_id).first()
        if clone_inst is None:
            return

        self.info('Running clone {}'.format(clone_inst.id))

        sequences = self._session.query(
            Sequence
        ).join(SequenceCollapse).filter(
            Sequence.clone_id == clone_id,
            SequenceCollapse.copy_number_in_subject > self._min_seq_copies
        )

        if self._exclude_stops:
            sequences = sequences.filter(Sequence.stop == 0)

        sequences = sequences.order_by(Sequence.v_length)

        try:
            tree = PhylogeneticTree(clone_inst.consensus_germline, sequences)
            tree.run(self._session, self._tree_prog)
        except Exception as e:
            logger.error('Error running clone {}: {}'.format(clone_id, e))
            return

        final = {
            'info': {
                'min_count': self._min_count,
                'min_samples': self._min_samples,
                'min_seq_copies': self._min_seq_copies,
                'exclude_stops': self._exclude_stops
            },
            'tree': tree_as_dict(tree.tree)
        }
        clone_inst.tree = json.dumps(final)
        self._session.add(clone_inst)
        self._session.commit()
コード例 #26
0
def aggregate_vdj(aggregate_queue):
    alignments = {
        'success': {},
        'noresult': []
    }
    for result in aggregate_queue:
        if result['status'] == 'success':
            alignment = result['alignment']
            seq_key = alignment.sequence.sequence
            if seq_key in alignments['success']:
                alignments['success'][seq_key].sequence.copy_number += (
                    alignment.sequence.copy_number)
            else:
                alignments['success'][seq_key] = alignment
        elif result['status'] == 'noresult':
            alignments['noresult'].append(result)
        elif result['status'] == 'error':
            logger.error(
                'Unexpected error processing sequence {}\n\t{}'.format(
                    result['vdj'].seq_id, result['reason']))
    alignments['success'] = alignments['success'].values()
    return alignments
コード例 #27
0
def combine_samples(session, args):
    groups = {}

    for meta in session.query(SampleMetadata).filter(
            SampleMetadata.key == args.combine_field):
        groups.setdefault(meta.value, set()).add(meta.sample_id)
    all_subjects = set()
    for group_id, samples in groups.items():
        group_subs = session.query(Sample.subject_id).filter(
            Sample.id.in_(samples)
        ).group_by(Sample.subject_id)
        group_subs = [s.subject_id for s in group_subs]
        all_subjects.update(set(group_subs))
        if len(group_subs) > 1:
            logger.error('Cannot combine samples across subjects '
                         '(group "{}" has {} subjects)'.format(
                             group_id, len(group_subs)))
            sys.exit(1)

    all_samples = [s.id for s in session.query(Sample.id).filter(
        Sample.subject_id.in_(all_subjects))]

    logger.info('Resetting information for {} subjects'.format(
        len(all_subjects), len(all_samples)))
    logger.info('   Resetting collapsing')
    session.query(SequenceCollapse).filter(
        SequenceCollapse.sample_id.in_(all_samples)
    ).delete(synchronize_session=False)
    logger.info('   Resetting clones')
    session.query(Clone).filter(
        Clone.subject_id.in_(all_subjects)
    ).delete(synchronize_session=False)
    logger.info('   Resetting sample statistics')
    session.query(SampleStats).filter(
        SampleStats.sample_id.in_(all_samples)
    ).delete(synchronize_session=False)

    for group_id, samples in groups.items():
        final_sample_id = min(samples)
        logger.info('Combining {} samples into new sample "{}" (ID {})'.format(
            len(samples), group_id, final_sample_id))
        session.query(Sequence).filter(
            Sequence.sample_id.in_(samples)
        ).update({
            Sequence.sample_id: final_sample_id,
        }, synchronize_session=False)

        logger.info('Updating sample name and deleting empty samples')
        # collapse to one sample
        final_sample = session.query(Sample).get(final_sample_id)
        final_sample.name = group_id
        remove_duplicates(session, final_sample)

        logger.info('Moving noresults')
        session.query(NoResult).filter(
            NoResult.sample_id.in_(samples)
        ).update({
            'sample_id': final_sample_id
        }, synchronize_session=False)

        # delete the now-empty samples
        session.query(Sample).filter(
            Sample.id.in_(samples - set([final_sample_id]))
        ).delete(synchronize_session=False)

    session.commit()
    logger.info('Sequences successfully collapsed: please re-run '
                'immunedb_collapse and later pipeline steps.')
コード例 #28
0
ファイル: identify.py プロジェクト: rasteh/immunedb
def run_identify(session, args):
    mod_log.make_mod('identification',
                     session=session,
                     commit=True,
                     info=vars(args))
    session.close()
    # Load the germlines from files
    v_germlines = VGermlines(args.v_germlines)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3,
                             args.anchor_len, args.min_anchor_len)
    tasks = concurrent.TaskQueue()

    sample_names = set([])
    fail = False
    for directory in args.sample_dirs:
        # If metadata is not specified, assume it is "metadata.json" in the
        # directory
        if args.metadata is None:
            meta_fn = os.path.join(directory, 'metadata.json')
        else:
            meta_fn = args.metadata

        # Verify the metadata file exists
        if not os.path.isfile(meta_fn):
            logger.error('Metadata file not found.')
            return

        with open(meta_fn) as fh:
            metadata = json.load(fh)

        # Create the tasks for each file
        for fn in sorted(metadata.keys()):
            if fn == 'all':
                continue
            meta = SampleMetadata(
                metadata[fn], metadata['all'] if 'all' in metadata else None)
            if session.query(Sample).filter(
                    Sample.name == meta.get('sample_name'),
                    exists().where(
                        Sequence.sample_id == Sample.id)).first() is not None:
                log_f = logger.warning if args.warn_existing else logger.error
                log_f('Sample {} already exists. {}'.format(
                    meta.get('sample_name'),
                    'Skipping.' if args.warn_existing else 'Cannot continue.'))
                fail = True
            elif meta.get('sample_name') in sample_names:
                logger.error(
                    'Sample {} exists more than once in metadata.'.format(
                        meta.get('sample_name')))
                return
            else:
                tasks.add_task({'path': directory, 'fn': fn, 'meta': meta})
                sample_names.add(meta.get('sample_name'))

        if fail and not args.warn_existing:
            logger.error('Encountered errors.  Not running any identification.'
                         ' To skip samples that are already in the database '
                         'use --warn-existing.')
            return

    lock = mp.Lock()
    for i in range(0, min(args.nproc, tasks.num_tasks())):
        worker_session = config.init_db(args.db_config)
        tasks.add_worker(
            IdentificationWorker(worker_session, v_germlines, j_germlines,
                                 args.trim_to, args.max_padding,
                                 args.max_vties,
                                 args.min_similarity / float(100), lock))

    tasks.start()