Ejemplos de InsertOne en Python, ejemplos de pymongo.InsertOne en Python

Ejemplo n.º 1

0

Mostrar archivo

def divide_researchers_into_2groups():
    col_author = connectTable("qiuzh", "mag_researchers0810")
    col1 = connectTable("qiuzh", "researchers0810_trainingset")
    col2 = connectTable('qiuzh', "researchers0810_testset")
    opt1 = []
    opt2 = []
    count = 0
    cursor = col_author.find(no_cursor_timeout=True)
    for researcher in col_author.find():
        count += 1
        if researcher["first_year"] <= 1996:
            opt1.append(pymongo.InsertOne(researcher))
        else:
            opt2.append(pymongo.InsertOne(researcher))

        if count % 10000 == 0:
            print("已处理:", count / 10000, flush=True)
            col1.bulk_write(opt1, ordered=False)
            print("已写入:", len(opt1), flush=True)
            col2.bulk_write(opt2, ordered=False)
            print("已写入:", len(opt2), flush=True)
            opt1 = []
            opt2 = []
    if opt1:
        col1.bulk_write(opt1, ordered=False)
        print("又写入:", len(opt1), flush=True)
    if opt2:
        col2.bulk_write(opt2, ordered=False)
        print("又写入:", len(opt2), flush=True)
    cursor.close()

Ejemplo n.º 2

0

Mostrar archivo

def replace_with_correct_contig(mongo_source, assembly_accession, study_accession, incorrect_contig, correct_contig,
                                num_variants_to_replace):
    sve_collection = mongo_source.mongo_handle[mongo_source.db_name]["submittedVariantEntity"]
    filter_criteria = {'seq': assembly_accession, 'study': study_accession, 'contig': incorrect_contig}
    cursor = sve_collection.with_options(read_concern=ReadConcern("majority")) \
        .find(filter_criteria, no_cursor_timeout=True).limit(num_variants_to_replace)
    insert_statements = []
    drop_statements = []
    total_inserted, total_dropped = 0, 0
    try:
        for variant in cursor:
            original_id = get_SHA1(variant)
            assert variant['_id'] == original_id, "Original id is different from the one calculated %s != %s" % (
                variant['_id'], original_id)
            variant['contig'] = correct_contig
            variant['_id'] = get_SHA1(variant)
            insert_statements.append(pymongo.InsertOne(variant))
            drop_statements.append(pymongo.DeleteOne({'_id': original_id}))
        result_insert = sve_collection.with_options(write_concern=WriteConcern(w="majority", wtimeout=1200000)) \
            .bulk_write(requests=insert_statements, ordered=False)
        total_inserted += result_insert.inserted_count
        result_drop = sve_collection.with_options(write_concern=WriteConcern(w="majority", wtimeout=1200000)) \
            .bulk_write(requests=drop_statements, ordered=False)
        total_dropped += result_drop.deleted_count
        logger.info('%s / %s new documents inserted' % (total_inserted, num_variants_to_replace))
        logger.info('%s / %s old documents dropped' % (total_dropped, num_variants_to_replace))
    except Exception as e:
        print(traceback.format_exc())
        raise e
    finally:
        cursor.close()
    return total_inserted

Ejemplo n.º 3

0

Mostrar archivo

Archivo: deprecate_variant_from_contig.py Proyecto: sundarvenkata-EBI/eva-tasks

def deprecate(settings_xml_file, study, assembly_accession, contigs=None):
    """
    Connect to mongodb and retrieve all variants that needs to be deprecated.
    Copy the variant in the operation collection and delete them from the submitted variant collections.
    """
    with pymongo.MongoClient(get_mongo_uri_for_eva_profile('production', settings_xml_file)) as accessioning_mongo_handle:
        sve_collection = accessioning_mongo_handle['eva_accession_sharded']["submittedVariantEntity"]
        deprecated_sve_collection = accessioning_mongo_handle['eva_accession_sharded']["submittedVariantOperationEntity"]
        cursor = sve_collection.find({'seq': assembly_accession, 'study': study, 'contig': {'$in': contigs}})
        insert_statements = []
        drop_statements = []
        for variant in cursor:
            insert_statements.append(pymongo.InsertOne(inactive_object(variant)))
            drop_statements.append(pymongo.DeleteOne({'_id': variant['_id']}))

    # There should only be 458 variant to deprecate
    assert len(insert_statements) == 458
    assert len(drop_statements) == 458

    logger.info('Found %s variant to deprecate', len(insert_statements))

    result_insert = deprecated_sve_collection.bulk_write(requests=insert_statements, ordered=False)
    result_drop = sve_collection.bulk_write(requests=drop_statements, ordered=False)
    logger.info('There was %s new documents inserted in inactive entities' % result_insert.inserted_count)
    logger.info('There was %s old documents dropped from ' % result_drop.deleted_count)
    accessioning_mongo_handle.close()

Ejemplo n.º 4

0

Mostrar archivo

Archivo: join.py Proyecto: lsst-france/Spark

def do_create(lsst, nobjects=1000):
    try:
        lsst.drop_collection('y')
        print('y dropped')
    except:
        pass

    ra = 0
    decl = 0
    window = 180.

    stepper = st.Stepper()
    requests = []
    for i in range(nobjects):
        obj = {
            'loc': [(random.random() * 2 * window - window),
                    (random.random() * 2 * window - window)]
        }
        # lsst.y.insert( obj )
        requests.append(pymongo.InsertOne(obj))

    try:
        lsst.y.bulk_write(requests)
    except BulkWriteError as bwe:
        print('error in bulk write', bwe.details)
        exit()

    stepper.show_step('y created')

    # lsst.Object.aggregate( [ {'$match' : {'chunkId': 516} }, { '$project': { 'loc': [ '$ra', '$decl' ] } }, {'$limit': 1000}, {'$out': 'y'} ] )
    print(lsst.y.count())

    result = lsst.y.find()
    for i, o in enumerate(result):
        print(o)
        if i > 10:
            break

    stepper = st.Stepper()
    try:
        lsst.y.create_index([('loc.0', pymongo.ASCENDING)])
    except pymongo.errors.PyMongoError as e:
        print('error create index on ra', e)
    stepper.show_step('index loc.0 creation')

    stepper = st.Stepper()
    try:
        lsst.y.create_index([('loc.1', pymongo.ASCENDING)])
    except pymongo.errors.PyMongoError as e:
        print('error create index on decl', e)
    stepper.show_step('index loc.1 creation')

    stepper = st.Stepper()
    try:
        lsst.y.create_index([('loc', pymongo.GEO2D)])
    except pymongo.errors.PyMongoError as e:
        print('error create_geo_index', e)
    stepper.show_step('index loc creation')

    test9(lsst.y)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_pymongo.py Proyecto: takeaway/python-sensor

    def test_successful_mutiple_queries(self):
        with tracer.start_active_span("test"):
            self.conn.test.records.bulk_write([pymongo.InsertOne({"type": "string"}),
                                               pymongo.UpdateOne({"type": "string"}, {"$set": {"type": "int"}}),
                                               pymongo.DeleteOne({"type": "string"})])

        assert_is_none(tracer.active_span)

        spans = self.recorder.queued_spans()
        self.assertEqual(len(spans), 4)

        test_span = spans.pop()

        seen_span_ids = set()
        commands = []
        for span in spans:
            self.assertEqual(test_span.t, span.t)
            self.assertEqual(span.p, test_span.s)

            # check if all spans got a unique id
            assert_false(span.s in seen_span_ids)

            seen_span_ids.add(span.s)
            commands.append(span.data["mongo"]["command"])

        # ensure spans are ordered the same way as commands
        assert_list_equal(commands, ["insert", "update", "delete"])

Ejemplo n.º 6

0

Mostrar archivo

def get_insert_statements(sve_collection, contig_equivalents):
    wrong_contigs = list(contig_equivalents.keys())
    filter_criteria = {
        'seq': 'GCA_000001895.4',
        'study': 'PRJEB42012',
        'contig': {
            '$in': wrong_contigs
        }
    }
    cursor = sve_collection.with_options(
        read_concern=ReadConcern("majority")).find(filter_criteria,
                                                   no_cursor_timeout=True)
    insert_statements = []
    drop_statements = []
    try:
        for variant in cursor:
            original_id = get_SHA1(variant)
            assert variant['_id'] == original_id, f"Original id is different from the one calculated " \
                                                  f"{variant['_id']} != {original_id}"
            variant['contig'] = contig_equivalents[variant['contig']]
            variant['_id'] = get_SHA1(variant)
            insert_statements.append(pymongo.InsertOne(variant))
            drop_statements.append(pymongo.DeleteOne({'_id': original_id}))
    except Exception as e:
        print(traceback.format_exc())
        raise e
    finally:
        cursor.close()

    return insert_statements, drop_statements

Ejemplo n.º 7

0

Mostrar archivo

Archivo: 00-startup.py Proyecto: NSLS-II-HXN/ipython_ophyd

    def _bulk_insert_events(self, event_col, descriptor, events, validate, ts):

        descriptor_uid = doc_or_uid_to_uid(descriptor)

        to_write = []
        for ev in events:
            data = dict(ev['data'])

            # Replace any filled data with the datum_id stashed in 'filled'.
            for k, v in six.iteritems(ev.get('filled', {})):
                if v:
                    data[k] = v
            # Convert any numpy types to native Python types.
            apply_to_dict_recursively(data, sanitize_np)
            timestamps = dict(ev['timestamps'])
            apply_to_dict_recursively(timestamps, sanitize_np)

            # check keys, this could be expensive
            if validate:
                if data.keys() != timestamps.keys():
                    raise ValueError(
                        BAD_KEYS_FMT.format(data.keys(),
                                            timestamps.keys()))
            ev_uid = ts + '-' + ev['uid']

            ev_out = dict(descriptor=descriptor_uid, uid=ev_uid,
                          data=data, timestamps=timestamps,
                          time=ev['time'],
                          seq_num=ev['seq_num'])

            to_write.append(pymongo.InsertOne(ev_out))

        event_col.bulk_write(to_write, ordered=True)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: correct_assembly_for_study.py Proyecto: sundarvenkata-EBI/eva-tasks

def correct(mongo_user, mongo_password, mongo_host, study, reference_source,
            reference_dest):
    """
    Connect to mongodb and retrieve all variants the should be updated, Check their key and update them in bulk.
    """
    with get_mongo_connection_handle(
            username=mongo_user, password=mongo_password,
            host=mongo_host) as accessioning_mongo_handle:
        sve_collection = accessioning_mongo_handle["eva_accession_sharded"][
            "submittedVariantEntity"]
        cursor = sve_collection.find({'study': study, 'seq': reference_source})
        insert_statements = []
        drop_statements = []
        record_checked = 0
        for variant in cursor:
            # Ensure that the variant we are changing has the expected SHA1
            original_id = get_SHA1(variant)
            assert variant[
                '_id'] == original_id, "Original id is different from the one calculated %s != %s" % (
                    variant['_id'], original_id)
            variant['seq'] = reference_dest
            variant['_id'] = get_SHA1(variant)
            insert_statements.append(pymongo.InsertOne(variant))
            drop_statements.append(pymongo.DeleteOne({'_id': original_id}))
            record_checked += 1

        print('Retrieved %s documents and checked matching Sha1 hash' %
              record_checked)
        result_insert = sve_collection.bulk_write(requests=insert_statements,
                                                  ordered=False)
        print('There was %s new documents inserted' %
              result_insert.inserted_count)
        result_drop = sve_collection.bulk_write(requests=drop_statements,
                                                ordered=False)
        print('There was %s old documents dropped' % result_drop.deleted_count)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: util.py Proyecto: ornicar/lila-db-seed

def bulkwrite(coll,
              objs,
              append=False):  # append is used during bson/json export
    if gen.dumpDir != None:
        if not os.path.isdir(gen.dumpDir):
            os.makedirs(gen.dumpDir, exist_ok=True)
        if not os.path.isdir(gen.dumpDir):
            raise FileNotFoundError(gen.dumpDir)
        ext: str = "bson" if gen.bsonMode else "json"
        outpath: str = os.path.join(gen.dumpDir, f"{coll.name}.{ext}")
        openmode = ("a" if append else "w") + ("b" if gen.bsonMode else "")
        with open(outpath, openmode if gen.bsonMode else "w") as f:
            for o in objs:
                if gen.bsonMode:
                    f.write(bson.encode(o.__dict__))
                else:
                    f.write(json.dumps(o.__dict__, default=str, indent=4))
        print(f"Colleciton {coll.name}: dumped to {outpath}")
    else:
        ledger = []
        for x in objs:
            ledger.append(pymongo.DeleteOne({"_id": x._id}))
            ledger.append(pymongo.InsertOne(x.__dict__))
        res = coll.bulk_write(ledger)
        print(f"Collection {coll.name}: {res.bulk_api_result}")

Ejemplo n.º 10

0

Mostrar archivo

    def runTestTrialThread(self, testIdx):
        # Perform inserts
        batch = []
        errors = []
        runTime = 0
        client = pymongo.MongoClient(self.connString)
        mongoColl = client[self.dbName][self.collName].with_options(
            write_concern=pymongo.write_concern.WriteConcern(w=1)
        )
        for i in range(0, self.numDocsToInsert):
            batch.append(pymongo.InsertOne(self.documentProvider.createDocument(testIdx, i)))

            if i % self.insertBatchSize == 0:
                startTime = time.time()
                try:
                    mongoColl.bulk_write(batch, ordered=False)
                except pymongo.errors.BulkWriteError as e:
                    for x in e.details[u'writeErrors']:
                        error_id = x[u'op']['_id']
                        errors.append(get_document(batch, error_id))
                runTime += (time.time() - startTime)
                batch = []
        startTime = time.time()
        try:
            mongoColl.bulk_write(batch, ordered=False)
        except pymongo.errors.BulkWriteError as e:
            for x in e.details[u'writeErrors']:
                error_id = x[u'op']['_id']
                errors.append(get_document(batch, error_id))
        runTime += (time.time() - startTime)
        batch = []
        return runTime

Ejemplo n.º 11

0

Mostrar archivo

Archivo: handler.py Proyecto: LedgeDash/DeathStarFaaS

def store_urls_mongo(url_docs):
    '''store url documents in mongodb.
    On error, this function mimicks the behavior of the invoke function where it
    returns a dict with a key "http_status_code" equal to 500.
    '''
    connect_str = os.getenv('MONGO_URI')
    try:
        client = pymongo.mongo_client.MongoClient(connect_str)
    except:
        return {
            "http_status_code": 500,
            "error": "MongoDB failed to connect to " + connect_str
        }

    db = client.url_shorten
    collection = db.url_shorten

    requests = [pymongo.InsertOne(doc) for doc in url_docs]

    ret = collection.bulk_write(requests)

    if ret.inserted_count != len(url_docs):
        return {
            "http_status_code":
            500,
            "error":
            "Some inserts failed: " + str(len(url_docs)) + " attempts, " +
            str(ret.inserted_count) + " successes"
        }

    return {"http_status_code": 200, "status": "success"}

Ejemplo n.º 12

0

Mostrar archivo

def write_reactions_to_mine(reactions: List[dict],
                            db: MINE,
                            chunk_size: int = 10000) -> None:
    """Write reactions to reaction collection of MINE.

    Parameters
    ----------
    reactions : List[dict]
        Dictionary of reactions to write.
    db : MINE
        MINE object to write reactions with.
    chunk_size : int, optional
        Size of chunks to break reactions into when writing, by default 10000.
    """
    n_rxns = len(reactions)
    for i, rxn_chunk in enumerate(utils.Chunks(reactions, chunk_size)):
        if i % 20 == 0:
            print(
                f"Writing Reactions: Chunk {i} of {int(n_rxns/chunk_size) + 1}"
            )
        rxn_requests = [
            pymongo.InsertOne(utils.convert_sets_to_lists(rxn_dict))
            for rxn_dict in rxn_chunk
        ]

        db.reactions.bulk_write(rxn_requests, ordered=False)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: similarity_matrix.py Proyecto: pawelkaminski/AltaGoGs

    def _push_similarity(self, client):
        similarity_collection = client[DB_NAME][SIMILARITY_COLLECTION_NAME]
        similarity_collection.drop()

        bulk = []
        for game_id, similarities in self.game_similar.items():
            top_similar = similarities.most_common(SIMILARITY_CUTOFF)
            similar = []
            for item_id, score in top_similar:
                if item_id in self.game_ratings:
                    game_rating = self.game_ratings[item_id]
                    score = score * TEXT_SIMILARITY_FACTOR + (
                        game_rating / MAX_GAME_RATING) * RATING_FACTOR
                similar.append({'itemId': item_id, 'score': float(score)})

            bulk.append(
                pymongo.InsertOne({
                    'itemId':
                    game_id,
                    'similar':
                    sorted(similar, key=lambda x: x['score'], reverse=True)
                }))

        similarity_collection.bulk_write(bulk)
        similarity_collection.create_index([('itemId', pymongo.ASCENDING)],
                                           unique=True)

Ejemplo n.º 14

0

Mostrar archivo

def deprecate(settings_xml_file, database_name, contigs=None):
    """
    Connect to mongodb and retrieve all variants that needs to be deprecated.
    Copy the variant in the operation collection and delete them from the submitted variant collections.
    """
    with pymongo.MongoClient(
            get_mongo_uri_for_eva_profile('production',
                                          settings_xml_file)) as mongo_handle:
        variant_collection = mongo_handle[database_name]['variants_2_0']
        deleted_variant_collection = mongo_handle[database_name][
            'to_delete_variants_2_0']

        cursor = variant_collection.find({'chr': {'$in': contigs}})
        drop_statements = []
        insert_statements = []
        for variant in cursor:
            insert_statements.append(pymongo.InsertOne(variant))
            drop_statements.append(pymongo.DeleteOne({'_id': variant['_id']}))

    logger.info('Found %s variant to remove', len(drop_statements))
    result_insert = deleted_variant_collection.bulk_write(
        requests=insert_statements, ordered=False)
    result_drop = variant_collection.bulk_write(requests=drop_statements,
                                                ordered=False)
    logger.info('There was %s new documents inserted in to_delete collection' %
                result_insert.inserted_count)
    logger.info('There was %s documents dropped from ' %
                result_drop.deleted_count)

    mongo_handle.close()

Ejemplo n.º 15

0

Mostrar archivo

def build_request(words, fields, schema):
    obj = dict()
    for item, word in enumerate(words):
        field = fields[item]

        if field not in schema.fields:
            print('error')
            return None

        ftype = schema.fields[field]

        if word == 'NULL':
            value = None
            continue

        try:
            if ftype == 'bit(1)':
                value = int(word)
            elif ftype == 'int(11)':
                value = int(word)
            elif ftype == 'bigint(20)':
                value = int(word)
            elif ftype == 'double':
                value = float(word)
            elif ftype == 'float':
                value = float(word)
        except:
            # we keep value as a string
            print('field:', field, 'value:', value)
            pass

        # print(field, '=', value)
        obj[field] = value

    return pymongo.InsertOne(obj)

Ejemplo n.º 16

0

Mostrar archivo

Archivo: correct_contig_for_study_rs.py Proyecto: sundarvenkata-EBI/eva-tasks

def do_updates(cve_collection, synonym_dictionaries, assembly_accession,
               chunk_size, number_of_variants_to_replace):
    cursor = cve_collection.find({'asm': assembly_accession},
                                 no_cursor_timeout=True)
    insert_statements = []
    drop_statements = []
    record_checked = 0
    already_genbanks = 0
    total_inserted = 0
    total_dropped = 0
    logging.info("Performing updates...")
    try:
        for variant in cursor:
            # Ensure that the variant we are changing has the expected SHA1
            original_id = get_SHA1(variant)
            assert variant[
                '_id'] == original_id, "Original id is different from the one calculated %s != %s" % (
                    variant['_id'], original_id)
            genbank, was_already_genbank = get_genbank(synonym_dictionaries,
                                                       variant['contig'])
            if was_already_genbank:
                already_genbanks += 1
            else:
                variant['contig'] = genbank
                variant['_id'] = get_SHA1(variant)
                insert_statements.append(pymongo.InsertOne(variant))
                drop_statements.append(pymongo.DeleteOne({'_id': original_id}))
            record_checked += 1
            if len(insert_statements) >= chunk_size:
                total_inserted, total_dropped = execute_bulk(
                    drop_statements, insert_statements, cve_collection,
                    total_dropped, total_inserted)
                logging.info('%s / %s new documents inserted' %
                             (total_inserted, number_of_variants_to_replace))
                logging.info('%s / %s old documents dropped' %
                             (total_dropped, number_of_variants_to_replace))
    except Exception as e:
        print(traceback.format_exc())
        raise e
    finally:
        cursor.close()

    if len(insert_statements) > 0:
        total_inserted, total_dropped = execute_bulk(drop_statements,
                                                     insert_statements,
                                                     cve_collection,
                                                     total_dropped,
                                                     total_inserted)
    logging.info('Retrieved %s documents and checked matching Sha1 hash' %
                 record_checked)
    logging.info(
        '{} of those documents had already a genbank contig. If the projects were all affected, '
        'that number should be 0, but even if it is not, there is nothing else to fix'
        .format(already_genbanks))
    logging.info('There was %s new documents inserted' % total_inserted)
    logging.info('There was %s old documents dropped' % total_dropped)
    return total_inserted

Ejemplo n.º 17

0

Mostrar archivo

Archivo: import-warehouses-to-mongodb.py Proyecto: ckurze/mongodb-hivemq-iot-demo

def main():
	json_file = open('../warehouses_de.geojson')
	data = json.load(json_file)
	batch = []
	for w in data['features']:
		batch.append(pymongo.InsertOne(w))
		write_batch(batch=batch, collection=warehouse_coll, full_batch_required=True)

	write_batch(batch=batch, collection=warehouse_coll, full_batch_required=False)

Ejemplo n.º 18

0

Mostrar archivo

    def insert_data(
        self,
        data,
        tags=None,
    ):
        if tags is not None:
            for tag, tag_val in tags.items():
                data[tag] = tag_val

        self.enqueue(self.collection, pymongo.InsertOne(data))

Ejemplo n.º 19

0

Mostrar archivo

def _get_cpd_insert(cpd_dict: dict):
    output_keys = [
        "_id",
        "ID",
        "SMILES",
        "InChI_key",
        "Type",
        "Generation",
        "Expand",
        "Reactant_in",
        "Product_of",
        "Matched_Peak_IDs",
        "Matched_Adducts",
        "Predicted_RT",
    ]

    # create Reactant_in
    reactant_in_requests = []
    product_of_requests = []
    insert_dict = {
        key: cpd_dict.get(key)
        for key in output_keys if cpd_dict.get(key) != None
    }
    if "Reactant_in" in insert_dict:
        chunked_reactant_in = _get_reactant_in_insert(cpd_dict)
        insert_dict["Reactant_in"] = []
        for r_in_dict in chunked_reactant_in:
            reactant_in_requests.append(pymongo.InsertOne(r_in_dict))
            insert_dict["Reactant_in"].append(r_in_dict["_id"])

    # create Product_of
    if "Product_of" in insert_dict:
        chunked_product_of = _get_product_of_insert(cpd_dict)
        insert_dict["Product_of"] = []
        for p_of_dict in chunked_product_of:
            product_of_requests.append(pymongo.InsertOne(p_of_dict))
            insert_dict["Product_of"].append(p_of_dict["_id"])

    cpd_request = pymongo.InsertOne(insert_dict)
    return cpd_request, reactant_in_requests, product_of_requests

Ejemplo n.º 20

0

Mostrar archivo

    def insert_data(
        self,
        data,
        collection='generic',
        tags=None,
    ):
        if collection is None:
            raise errors.Invalid("cannot insert data: no collection given")

        if tags is not None:
            for tag, tag_val in tags.items():
                data[tag] = tag_val

        self.enqueue(collection, pymongo.InsertOne(data))

Ejemplo n.º 21

0

Mostrar archivo

Archivo: pymongo_tests.py Proyecto: ugosan/apm-agent-python

def test_collection_bulk_write(elasticapm_client, mongo_database):
    elasticapm_client.begin_transaction('transaction.test')
    requests = [pymongo.InsertOne({'x': 1}),
                pymongo.DeleteOne({'x': 1}),
                pymongo.ReplaceOne({'w': 1}, {'z': 1}, upsert=True)]
    result = mongo_database.blogposts.bulk_write(requests)
    assert result.inserted_count == 1
    assert result.deleted_count == 1
    assert result.upserted_count == 1
    elasticapm_client.end_transaction('transaction.test')
    transactions = elasticapm_client.instrumentation_store.get_all()
    span = _get_pymongo_trace(transactions[0]['spans'])
    assert span['type'] == 'db.mongodb.query'
    assert span['name'] == 'elasticapm_test.blogposts.bulk_write'

Ejemplo n.º 22

0

Mostrar archivo

Archivo: databases.py Proyecto: wanliu2019/MINE-Database

def insert_reaction(reaction_dict):
    """Inserts a reaction into the MINE database and returns _id of the
        reaction in the mine database. 

    :param reaction_dict: A dictionary containing 'Reactants' and
        'Products' lists of StoichTuples
    :type reaction_dict: dict    
    :return: Request for bulk insert
    :rtype: pymongo.InsertOne
    """

    reaction_dict = utils.convert_sets_to_lists(reaction_dict)

    return pymongo.InsertOne(reaction_dict)

Ejemplo n.º 23

0

Mostrar archivo

Archivo: filter_researcher(step_5).py Proyecto: qzhszl/Discoverer_MAG

def researchers_collaboration_network():
    '''
    there are some problems in researchers_con_innewcollection network, so we may use the other method to replace it
    i.e create a collaboration network first.
    :param begin:
    :param end:
    :param msg:
    :return:
    '''

    start_time = time()
    print(start_time, flush=True)
    col1 = connectTable("qiuzh", "mag_papers0510")
    col2 = connectTable("qiuzh", "mag_researchers0707")
    col3 = connectTable("qiuzh", "coauthor_network0722")
    operation = []
    cursor = col2.find(no_cursor_timeout=True)
    count = 0
    for i in cursor:
        count += 1
        author_id = i["_id"]
        # coauthor_times = 0
        # coauthor_list = []
        papers = i["new_pubs"]
        for paper in papers:
            paper_details = col1.find_one({"_id": paper},
                                          no_cursor_timeout=True)
            for author in paper_details["authors"]:
                if author["id"] != author_id and col2.find_one(
                    {"_id": author["id"]}, no_cursor_timeout=True):
                    # coauthor_list.append({"coauthor_id": author["id"], "coauthor_time": paper_details["year"]})
                    operation.append(
                        pymongo.InsertOne({
                            "author_id":
                            author_id,
                            "coauthor_id":
                            author["id"],
                            "coauthor_time":
                            paper_details["year"],
                        }))
        if count % 10000 == 0:
            print("已处理:", count / 10000, flush=True)
            col3.bulk_write(operation, ordered=False)
            print("已写入:", count / 10000, flush=True)
            operation = []
            print(time(), flush=True)
    if operation:
        col3.bulk_write(operation, ordered=False)
    print("已完成", len(operation), flush=True)
    print(time(), (time() - start_time), flush=True)

Ejemplo n.º 24

0

Mostrar archivo

def add_coauthor_relation2newcollection():
    '''
    coauthor times and coauthor relationships
    :return:
    mag_authors0411:
    {coauthor_counts:n}
    {coauthor_list:[{year:1999,id:1000000},year:1998,id:1000001}]}
    because some of the authors in the dataset have too many collaborations and exceed the maximum RAM of a document,
    we store the relation in a new collection
    _id:
    "author_id" :
    "coauthor_id":
    "coauthor_time":
    '''

    start_time = time()
    print(start_time,flush=True)
    col1 = connectTable("qiuzh", "mag_papers0415")
    col2 = connectTable("qiuzh", "mag_authors0411")
    col3 = connectTable("qiuzh", "coauthor_network0420")
    operation = []
    cursor = col2.find(no_cursor_timeout=True)[3790001:]
    count =0
    for i in cursor:
        count+=1
        author_id = i["_id"]
        # coauthor_times = 0
        # coauthor_list = []
        papers = i["new_pubs"]
        for paper in papers:
            paper_details = col1.find_one({"_id": paper})
            # if paper_details:
            # coauthor_times += (len(paper_details["authors"]) - 1)
            for author in paper_details["authors"]:
                if author["id"] != author_id:
                    # coauthor_list.append({"coauthor_id": author["id"], "coauthor_time": paper_details["year"]})
                    operation.append(pymongo.InsertOne(
                        {"author_id": author_id, "coauthor_id": author["id"], "coauthor_time": paper_details["year"],
                         }))
        if count % 10000 == 0:
            print("已处理:", count / 10000, flush=True)
            col3.bulk_write(operation, ordered=False)
            print("已写入:", count / 10000, flush=True)
            operation = []
            print(time(), flush=True)
    if operation:
        col3.bulk_write(operation, ordered=False)
    print("已完成",len(operation),flush=True)
    print(time(), (time() - start_time), flush=True)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: mongo_core.py Proyecto: untzag/databroker

def bulk_insert_events(event_col, descriptor, events, validate):
    """Bulk insert many events

    Parameters
    ----------
    event_descriptor : dict or str
        The Descriptor to insert event for.  Can be either
        a dict with a 'uid' key or a uid string
    events : iterable
       iterable of dicts matching the bs.Event schema
    validate : bool
       If it should be checked that each pair of data/timestamps
       dicts has identical keys

    Returns
    -------
    ret : dict
        dictionary of details about the insertion
    """
    descriptor_uid = doc_or_uid_to_uid(descriptor)

    def event_factory():
        for ev in events:
            data = dict(ev['data'])
            # Replace any filled data with the datum_id stashed in 'filled'.
            for k, v in six.iteritems(ev.get('filled', {})):
                if v:
                    data[k] = v
            # Convert any numpy types to native Python types.
            apply_to_dict_recursively(data, sanitize_np)
            timestamps = dict(ev['timestamps'])
            apply_to_dict_recursively(timestamps, sanitize_np)
            # check keys, this could be expensive
            if validate:
                if data.keys() != timestamps.keys():
                    raise ValueError(
                        BAD_KEYS_FMT.format(data.keys(), timestamps.keys()))

            ev_out = dict(descriptor=descriptor_uid,
                          uid=ev['uid'],
                          data=data,
                          timestamps=timestamps,
                          time=ev['time'],
                          seq_num=ev['seq_num'])
            yield ev_out

    bulk = [pymongo.InsertOne(ev) for ev in event_factory()]
    return event_col.bulk_write(bulk, ordered=True)

Ejemplo n.º 26

0

Mostrar archivo

Archivo: filter_researcher(step_5).py Proyecto: qzhszl/Discoverer_MAG

def filter_researchers_paper_by_authors():
    '''
    from mag_researchers0707(pubs>=10, academic career life >=10) to mag_researchers0810(only the author number of a
    paper less than 10 will be considered in the dataset)
    :param msg:
    :param begin:
    :param end:
    :return:
    this function is created in 2021.8.10
    '''
    # col2 = connectTable('qiuzh', "mag_researchers0707")
    # col2.drop()
    col1 = connectTable('qiuzh', "mag_researchers0707")
    col2 = connectTable('qiuzh', "mag_researchers0810")
    col_paper = connectTable("qiuzh", "mag_papers0510")
    cursor = col1.find(no_cursor_timeout=True)
    opt = []
    count = 0
    print(cursor.count())
    for i in cursor:
        count += 1
        pubs = i["new_pubs"]
        new_pubs = []
        for pub in pubs:
            paper = col_paper.find_one({"_id": pub["pid"]})
            if len(paper["authors"]) <= 10:
                new_pubs.append(pub)
        opt.append(
            pymongo.InsertOne({
                "_id": i["_id"],
                "new_pubs": new_pubs,
                "pub_count": i["pub_count"],
                "first_year": i["first_year"],
                "last_year": i["last_year"],
                "cn": i["cn"]
            }))
        if count % 10000 == 0:
            print(len(opt))
            print(count)
            print("已处理:", count / 10000, flush=True)
            col2.bulk_write(opt, ordered=False)
            print("已写入:", count / 10000, flush=True)
            opt = []
    if opt:
        col2.bulk_write(opt, ordered=False)
        print("最终又完成", len(opt))
    print(count)
    cursor.close()

Ejemplo n.º 27

0

Mostrar archivo

Archivo: step1_filter_author.py Proyecto: qzhszl/Discoverer_MAG

def filter_author_by_careerlife(begin,end,msg):
    '''
    :param msg:
    :param begin:
    :param end:
    :return: pubs>=10, org exist(affiliation)
    '''
    col1 = connectTable('qiuzh', "mag_authors0421")
    col2 = connectTable('qiuzh', "mag_authors0411")
    cursor = col2.find(no_cursor_timeout=True)[begin:end]
    opt =[]
    for i in cursor:
        if i["first_year"]-i["last_year"]>=20:
            opt.append(pymongo.InsertOne({"_id":i["_id"],"new_pubs":i["new_pubs"],"pub_count":i["pub_count"],"first_year":i["first_year"],"last_year":i["last_year"]}))
    col1.bulk_write(opt,ordered=False)
    cursor.close()

Ejemplo n.º 28

0

Mostrar archivo

Archivo: step1_filter_author.py Proyecto: qzhszl/Discoverer_MAG

def filter_author_by_citation(begin, end,msg):
    '''
    :param msg: multi-process information
    :param begin: i-th
    :param end: i+1-th
    :return: pubs counts>=5
    '''
    col1 = connectTable("academic", "mag_authors")
    col2 = connectTable('qiuzh', "MAG_authors")
    opt = []
    # count = 0
    for i in col1.find({"n_pubs":{"$gte":5}})[begin: end]:
        a =i
        opt.append(pymongo.InsertOne(i))
    col2.bulk_write(opt, ordered=False)
    print("线程： %s, 遍历了 %s" % (msg, len(opt)))

Ejemplo n.º 29

0

Mostrar archivo

Archivo: pymongo_tests.py Proyecto: dedemorton/apm-agent-python

 def test_collection_bulk_write(self):
     self.client.begin_transaction('transaction.test')
     requests = [
         pymongo.InsertOne({'x': 1}),
         pymongo.DeleteOne({'x': 1}),
         pymongo.ReplaceOne({'w': 1}, {'z': 1}, upsert=True)
     ]
     result = self.db.blogposts.bulk_write(requests)
     self.assertEqual(result.inserted_count, 1)
     self.assertEqual(result.deleted_count, 1)
     self.assertEqual(result.upserted_count, 1)
     self.client.end_transaction('transaction.test')
     transactions = self.client.instrumentation_store.get_all()
     trace = _get_pymongo_trace(transactions[0]['traces'])
     self.assertEqual(trace['type'], 'db.mongodb.query')
     self.assertEqual(trace['name'], 'elasticapm_test.blogposts.bulk_write')

Ejemplo n.º 30

0

Mostrar archivo

def test_collection_bulk_write(instrument, elasticapm_client, mongo_database):
    elasticapm_client.begin_transaction("transaction.test")
    requests = [
        pymongo.InsertOne({"x": 1}),
        pymongo.DeleteOne({"x": 1}),
        pymongo.ReplaceOne({"w": 1}, {"z": 1}, upsert=True),
    ]
    result = mongo_database.blogposts.bulk_write(requests)
    assert result.inserted_count == 1
    assert result.deleted_count == 1
    assert result.upserted_count == 1
    elasticapm_client.end_transaction("transaction.test")
    transactions = elasticapm_client.transaction_store.get_all()
    span = _get_pymongo_trace(transactions[0]["spans"])
    assert span["type"] == "db.mongodb.query"
    assert span["name"] == "elasticapm_test.blogposts.bulk_write"