Esempio n. 1
0
def db_index(name, host):
    
    colls = db_connect(name, host)
    #index sample_sets collection
    logging.info('indexing sample_sets collection')
    ss = colls['sample_sets']
    ss.ensure_index('name')
    
    #indexing results collection
    logging.info('indexing results collection')
    results = colls['results']
    
    results.ensure_index([('t_id', pymongo.ASCENDING), 
                          ('ss_id', pymongo.ASCENDING)])
    results.ensure_index([('ss_id', pymongo.ASCENDING), 
                          ('nes', pymongo.ASCENDING)])
    
    #create indexes for the merged collection
    #create compound index for ss_id + each other field in reports
    client = pymongo.MongoClient(host)
    db = client[name]
    merged = db.merged
    for field in fields_results:
        if field != 'ss_id':
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                 (field, pymongo.ASCENDING)])
            #create three item compound index to be used when a category filter is applied
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                ('category', pymongo.ASCENDING),
                                (field, pymongo.ASCENDING)])
            #create indexes for regex queries 
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                (field, pymongo.ASCENDING),
                                ('nearest_gene_names', pymongo.ASCENDING)])
    fields_trans.append('loc_strand')
    for field in fields_trans:
        if field != 'category': 
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                 (field, pymongo.ASCENDING)])
            #create three item compound index to be used when a category filter is applied
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                ('category', pymongo.ASCENDING),
                                (field, pymongo.ASCENDING)])
            #create indexes for regex queries 
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                (field, pymongo.ASCENDING),
                                ('nearest_gene_names', pymongo.ASCENDING)])
Esempio n. 2
0
def db_index(name, host):

    colls = db_connect(name, host)
    #index sample_sets collection
    logging.info('indexing sample_sets collection')
    ss = colls['sample_sets']
    ss.ensure_index('name')

    #indexing results collection
    logging.info('indexing results collection')
    results = colls['results']

    results.ensure_index([('t_id', pymongo.ASCENDING),
                          ('ss_id', pymongo.ASCENDING)])
    results.ensure_index([('ss_id', pymongo.ASCENDING),
                          ('nes', pymongo.ASCENDING)])

    #create indexes for the merged collection
    #create compound index for ss_id + each other field in reports
    client = pymongo.MongoClient(host)
    db = client[name]
    merged = db.merged
    for field in fields_results:
        if field != 'ss_id':
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                 (field, pymongo.ASCENDING)])
            #create three item compound index to be used when a category filter is applied
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                 ('category', pymongo.ASCENDING),
                                 (field, pymongo.ASCENDING)])
            #create indexes for regex queries
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                 (field, pymongo.ASCENDING),
                                 ('nearest_gene_names', pymongo.ASCENDING)])
    fields_trans.append('loc_strand')
    for field in fields_trans:
        if field != 'category':
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                 (field, pymongo.ASCENDING)])
            #create three item compound index to be used when a category filter is applied
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                 ('category', pymongo.ASCENDING),
                                 (field, pymongo.ASCENDING)])
            #create indexes for regex queries
            merged.ensure_index([('ss_id', pymongo.ASCENDING),
                                 (field, pymongo.ASCENDING),
                                 ('nearest_gene_names', pymongo.ASCENDING)])
Esempio n. 3
0
def db_metadata_import(input_dir, name, host):
    logging.info('importing ssea data to %s database on mongo server: %s' %
                 (name, host))

    logging.debug("Importing samples file")
    _ssea_path = ssea.__path__[0]
    _merge_path = os.path.join(_ssea_path, 'utils/mongo_metadata_printJSON.py')
    p1 = subprocess.Popen(['python', _merge_path, input_dir, '-c'],
                          stdout=subprocess.PIPE)
    p2 = subprocess.Popen(
        ['mongoimport', '-c', 'samples', '--host', host, '-d', name],
        stdin=p1.stdout)
    p1.wait()
    p2.wait()
    logging.info("Importing transcripts file")
    p1 = subprocess.Popen(['python', _merge_path, input_dir, '-r'],
                          stdout=subprocess.PIPE)
    p2 = subprocess.Popen(
        ['mongoimport', '-c', 'transcripts', '--host', host, '-d', name],
        stdin=p1.stdout)
    p1.wait()
    p2.wait()
    sample_index_fields = [
        'library_id', 'tcga_disease_type', 'assembly_cohort',
        'fragment_length_mean', 'cohort', 'cancer_progression',
        'tcga_legacy_id'
    ]

    transcript_index_fields = [
        'category', 'nearest_gene_names', 'gene_id', 'locus', 'name'
    ]

    colls = db_connect(name, host)
    samples = colls['samples']
    logging.info('indexing samples collection')
    for field in sample_index_fields:
        samples.ensure_index(field)

    transcripts = colls['transcripts']
    logging.info('indexing transcripts collection')
    for field in sample_index_fields:
        transcripts.ensure_index(field)
Esempio n. 4
0
def db_metadata_import(input_dir, name, host):
    logging.info('importing ssea data to %s database on mongo server: %s' % (name, host))

    
    logging.debug("Importing samples file")
    _ssea_path = ssea.__path__[0]
    _merge_path = os.path.join(_ssea_path, 'utils/mongo_metadata_printJSON.py')
    p1 = subprocess.Popen(['python', _merge_path, input_dir,'-c'], stdout=subprocess.PIPE)
    p2 = subprocess.Popen(['mongoimport', '-c', 'samples', '--host', host, '-d', name], stdin=p1.stdout)
    p1.wait()
    p2.wait()
    logging.info("Importing transcripts file")
    p1 = subprocess.Popen(['python', _merge_path, input_dir,'-r'], stdout=subprocess.PIPE)
    p2 = subprocess.Popen(['mongoimport', '-c', 'transcripts', '--host', host, '-d', name], stdin=p1.stdout)
    p1.wait()
    p2.wait()
    sample_index_fields = ['library_id',
                           'tcga_disease_type',
                           'assembly_cohort',
                           'fragment_length_mean',
                           'cohort',
                           'cancer_progression',
                           'tcga_legacy_id']
    
    transcript_index_fields = ['category',
                               'nearest_gene_names',
                               'gene_id',
                               'locus',
                               'name']
    
    colls = db_connect(name, host)
    samples = colls['samples']
    logging.info('indexing samples collection')
    for field in sample_index_fields:
        samples.ensure_index(field)
    
    transcripts = colls['transcripts']
    logging.info('indexing transcripts collection')
    for field in sample_index_fields:
        transcripts.ensure_index(field)