Example #1
0
def test00_from_mongo_to_mapping_long(pymongo_client):
    with open(
            os.path.join(TEST_DIR, 'resources', "functional", 'expected',
                         "mapping.json")) as f:
        exp_mapping = json.load(f)
    mongo_schema = extract_pymongo_client_schema(
        pymongo_client, database_names=['test_db', 'test_db1', 'test_db2'])

    mapping = mongo_schema_to_mapping(mongo_schema)
    assert mapping == exp_mapping
Example #2
0
def test00_from_mongo_to_mapping(pymongo_client):
    with open(os.path.join(TEST_DIR, "resources", "input",
                           "mapping.json")) as f:
        exp_mapping = json.load(f)
    mongo_schema = extract_pymongo_client_schema(pymongo_client,
                                                 database_names='test_db',
                                                 collection_names='test_col')

    mapping = mongo_schema_to_mapping(mongo_schema)
    assert mapping == exp_mapping
Example #3
0
def extract_schema(args):
    """ Main entry point function to extract schema."""
    start_time = time()
    logger.info('=== Start MongoDB schema analysis')
    client = pymongo.MongoClient(host=args.host, port=args.port)

    mongo_schema = extract_pymongo_client_schema(
        client,
        database_names=args.databases,
        collection_names=args.collections)

    logger.info('--- MongoDB schema analysis took %.2f s', time() - start_time)
    return mongo_schema
def extract_schema(arg):
    start_time = time()
    logger.info('=== Start MongoDB schema analysis')
    client = pymongo.MongoClient(host=arg['--host'], port=int(arg['--port']))

    schema = extract_pymongo_client_schema(
        client,
        database_names=arg['--database'],
        collection_names=arg['--collection'])

    logger.info('--- MongoDB schema analysis took {:.2f} s'.format(time() -
                                                                   start_time))
    return schema
Example #5
0
def extract_schema(args):
    """ Main entry point function to extract schema."""
    start_time = time()
    logger.info('=== Start MongoDB schema analysis')
    mongouri = 'mongodb://{}:{}/'.format(args.host, int(args.port))
    if args.user:
        mongouri = 'mongodb://{}:{}@{}:{}/{}'.format(args.user,
                                                     args.password, args.host,
                                                     int(args.port), 'admin')
    client = pymongo.MongoClient(mongouri)

    mongo_schema = extract_pymongo_client_schema(
        client,
        database_names=args.databases,
        collection_names=args.collections,
        filters=args.query)

    logger.info('--- MongoDB schema analysis took %.2f s', time() - start_time)
    return mongo_schema
Example #6
0
 def build(self):
     with pymongo.MongoClient(self.host, self.port) as client:
         for collection in client[self.db].list_collection_names():
             print(collection)
             schema = extract_pymongo_client_schema(client, [self.db],
                                                    [collection])
             mapping = mongo_schema_to_mapping(schema)
             t, created = Table.objects.get_or_create(
                 database=self.database, name=collection)
             if self.db not in mapping or collection not in mapping[
                     self.db]:
                 continue
             for column in mapping[self.db][collection].keys():
                 if column == 'pk':
                     continue
                 c, created = Column.objects.get_or_create(table=t,
                                                           name=column)
                 c.data_type = mapping[self.db][collection][column]['type']
                 c.is_null = True
                 c.save()
Example #7
0
def extract_schema(args):
    """ Main entry point function to extract schema."""
    start_time = time()
    logger.info('=== Start MongoDB schema analysis')
    if args.password:
        client = pymongo.MongoClient(host=args.host,
                                     port=args.port,
                                     username=args.user,
                                     password=args.password)
    else:
        client = pymongo.MongoClient(host=args.host, port=args.port)

    mongo_schema = extract_pymongo_client_schema(
        client,
        database_names=args.databases,
        collection_names=args.collections,
        sample_size=args.size)

    logger.info('--- MongoDB schema analysis took %.2f s', time() - start_time)
    return mongo_schema
#with open('data/sample_labeled_list_woAmbi_92742_70138_191119.pkl', 'rb') as f:
with open('data/labeled_list_woAmbi_92742_70138.pkl', 'rb') as f:
    samples = pickle.load(f)

#   Only use 10000 samples, ATM
samples = samples[0]


find_drugname = samples[5]
find_cellline = samples[6]

selected_doc =  collection.find({"drugname": find_drugname, "cellline": find_cellline})
print('Num of selected docs : ', selected_doc.count())
selected_doc = [x for x in selected_doc][0]


print(selected_doc['drugname'])
print(selected_doc['cellline'])
print(selected_doc['dosage'])
print(selected_doc['duration'])


#   Get length of collections
print('length of docs : ', collection.count_documents({}))


#   Use pymongo-schema for schema extraction
schema = extract_pymongo_client_schema(client)
print(schema)

 def extractSchema(self):
     # extract schema from MongoDB
     schema = extract_pymongo_client_schema(getDSClient())
     return schema