Esempi in Python per MongoClient.count, esempi in Python per pymongo.MongoClient.count

Esempio n. 1

0

Mostra file

def merge(mongo_collection=None, drop=True):
    ## merge docs
    if mongo_collection:
        mydisease = mongo_collection
    else:
        client = MongoClient()
        mydisease = client.mydisease.mydisease
    if drop:
        mydisease.drop()

    g = build_id_graph()

    # make initial primary d with all DOID docs
    db = MongoClient().mydisease.disease_ontoloy
    d = [{'_id': doc['_id'], 'disease_ontology': doc} for doc in db.find()]
    mydisease.insert_many(d)

    # fill in from other sources
    for db_name in tqdm(set(db_names) - {'disease_ontoloy'}):
        print(db_name)
        db = MongoClient().mydisease[db_name]
        if db.count() == 0:
            print("Warning: {} is empty".format(db))
        for doc in db.find():
            doids = get_equiv_doid(g, doc['_id'])
            for doid in doids:
                mydisease.update_one({'_id': doid}, {'$push': {db_name: doc}}, upsert=True)

Esempio n. 2

0

Mostra file

File: views.py Progetto: zzznn/Flask-INnoVation

 def __init__(self, page, username):
     posts = MongoClient().blog.Aritical.find({
         'username': username
     }).sort('issuing_time', DESCENDING)
     self.total = posts.count()
     self.pages = int(self.total / 20)
     if self.total % 20 != 0:
         self.pages += 1
     if page == 1:
         self.has_prev = False
     else:
         self.has_prev = True
     if page == self.pages:
         self.has_next = False
     else:
         self.has_next = True
     self.next_num = page + 1
     self.page = page
     self.per_page = 20
     self.prev_num = page - 1
     self.current_num = self.total - (20 * (page - 1))
     if self.current_num > 20:
         self.current_num = 20
     self.item = []
     for i in range(self.current_num):
         self.item.append(posts[self.prev_num * 20 + i])

Esempio n. 3

0

Mostra file

File: neighborLinkCount.py Progetto: Kitware/clique

def run(host=None, db=None, coll=None, node=None, outgoing="true", incoming="true", undirected="true"):
    # Connect to the mongo collection.
    graph = MongoClient(host)[db][coll]

    outgoing = json.loads(outgoing)
    incoming = json.loads(incoming)
    undirected = json.loads(undirected)

    # Construct the query according to the given options.
    query = {"type": "link"}
    clauses = []
    oid = ObjectId(node)
    if outgoing or incoming:
        dirclauses = []
        orclause = {"$or": [{"undirected": {"$not": {"$exists": 1}}},
                            {"undirected": False}]}
        if outgoing:
            dirclauses.append({"source": oid})

        if incoming:
            dirclauses.append({"target": oid})

        clauses.append({"$and": [orclause, {"$or": dirclauses}]})

    if undirected:
        clauses.append({"$and": [{"undirected": True},
                                 {"$or": [{"source": oid},
                                          {"target": oid}]}]})

    query["$or"] = clauses

    return json.dumps(graph.count(query))

Esempio n. 4

0

Mostra file

File: mongo_iterator.py Progetto: sebastiandev/pyragraph

class MongoIterator(object):

    def __init__(self, uri, db, collection, skip=0, limit=0, filter=None):
        self._collection = MongoClient(uri)[db][collection]
        self._skip = skip
        self._limit = limit
        self._filter = filter

    def __iter__(self):
        return self.stream()

    def stream(self, conditions=None, projection=None, skip=None, limit=None):
        proj = {k: 1 for k in projection} if projection else {}

        if proj:
            proj.update({'_id': False})  # skip internal id

        return self._collection.find(conditions or self._filter, proj or None, skip=skip or self._skip, limit=limit or self._limit)

    def size(self):
        return self._collection.count() if not self._filter else self._collection.find(self._filter).count()

    @property
    def filter(self):
        return self._filter

    @filter.setter
    def filter(self, conditions):
        self._filter = conditions

Esempio n. 5

0

Mostra file

def upload(source_json,
           source,
           db_name=DB_NAME,
           coll_name=VERBS,
           drop=False,
           indices=(VERB, PARADIGM)):
    target = MongoClient(LOCALHOST, PORT)[db_name][coll_name]
    if drop:
        target.drop()
    print('Initially,', target.count(), 'entries')
    count = counter()
    for line in read_json_lines(source_json):
        next(count)
        line[SOURCE] = source
        target.insert(line)
    add_indices(target, indices)
    print('\nCurrently,', target.count(), 'entries')

Esempio n. 6

0

Mostra file

class TvrainData:
    def __init__(self):
        """
        Just load data from Mongo.
        """
        self.sequences = MongoClient(
            os.environ['MONGODB_URL']).tvrain.sequences
        self.collection = MongoClient(
            os.environ['MONGODB_URL']).tvrain.articles
        self.collection.create_index("time")

    def get_random_articles(self, n):
        """Returns N of topics for index.html"""
        articles = self.collection.find().sort("time", 1).skip(
            random.randint(0, self.collection.count())).limit(n)
        return list(articles)

    def get_article_id(self, url):
        """Get id by url"""
        return self.collection.find_one({'url': url})['_id']

    def get_articles_data(self, articles_urls):
        """
        Get data from MongoDB for articles urls
        :param articles_urls: ['article_url', ...]
        :return: list of MongoDB documents
        """
        articles = []
        for url in articles_urls:
            articles.append(self.collection.find_one({'url': url}))
        return articles

    def iterate_articles(self,
                         except_articles,
                         skip=0,
                         limit=None,
                         query=None):
        """
        Iteate throw all articles without ids of except articles
        :param except_articles: list of ids
        :return:
        """
        if query is None:
            query = {}
        if limit is None:
            data = self.collection.find(query).skip(skip)
        else:
            data = self.collection.find(query).skip(skip).limit(limit)

        for value in data:
            if value['_id'] not in except_articles:
                yield value

    def get_sequences(self):
        """Return all sequences for train"""
        return list(self.sequences.find().limit(-1))

Esempio n. 7

0

Mostra file

def merge_one(db_name):
    mydisease = MongoClient().mydisease.mydisease
    g = build_id_graph()
    db = MongoClient().mydisease[db_name]
    if db.count() == 0:
        print("Warning: {} is empty".format(db))
    for doc in db.find():
        doids = get_equiv_doid(g, doc['_id'])
        for doid in doids:
            mydisease.update_one({'_id': doid}, {'$push': {db_name: doc}}, upsert=True)

Esempio n. 8

0

Mostra file

File: G8.py Progetto: lum4chi/IR

    class BigramsCorpus:
        def __init__(self, db, collection):
            self.client = MongoClient()[db][collection]

        def __iter__(self):
            for doc in self.client.find():
                yield [doc['_id']]

        def __len__(self):
            return self.client.count()

Esempio n. 9

0

Mostra file

File: views.py Progetto: zzznn/Flask-INnoVation

 def __init__(self, page, show_follow):
     if show_follow == 0:
         posts = MongoClient().blog.Aritical.find().sort(
             'issuing_time', DESCENDING)
         self.total = posts.count()
         self.posts = posts
     if show_follow == 1:
         self.posts = []
         following = MongoClient().blog.User.find_one({
             'username':
             current_user.username
         }).get('following')
         artical = MongoClient().blog.Aritical.find().sort(
             'issuing_time', DESCENDING)
         # following.append([current_user.username, 'date'])
         for i in range(following.__len__()):
             for x in range(artical.count()):
                 if following[i][0] == artical[x].get('username'):
                     self.posts.append(artical[x])
                     self.posts.sort(key=lambda x: x.get('issuing_time'),
                                     reverse=True)
         self.total = self.posts.__len__()
     self.pages = int(self.total / 20)
     if self.total % 20 != 0:
         self.pages += 1
     if page == 1:
         self.has_prev = False
     else:
         self.has_prev = True
     if page == self.pages:
         self.has_next = False
     else:
         self.has_next = True
     self.next_num = page + 1
     self.page = page
     self.per_page = 20
     self.prev_num = page - 1
     self.current_num = self.total - (20 * (page - 1))
     if self.current_num > 20:
         self.current_num = 20
     self.item = []
     for i in range(self.current_num):
         self.item.append(self.posts[self.prev_num * 20 + i])

Esempio n. 10

0

Mostra file

File: neighborLinkCount.py Progetto: raghu999/clique

def run(host=None,
        db=None,
        coll=None,
        node=None,
        outgoing="true",
        incoming="true",
        undirected="true"):
    # Connect to the mongo collection.
    graph = MongoClient(host)[db][coll]

    outgoing = json.loads(outgoing)
    incoming = json.loads(incoming)
    undirected = json.loads(undirected)

    # Construct the query according to the given options.
    query = {"type": "link"}
    clauses = []
    oid = ObjectId(node)
    if outgoing or incoming:
        dirclauses = []
        orclause = {
            "$or": [{
                "undirected": {
                    "$not": {
                        "$exists": 1
                    }
                }
            }, {
                "undirected": False
            }]
        }
        if outgoing:
            dirclauses.append({"source": oid})

        if incoming:
            dirclauses.append({"target": oid})

        clauses.append({"$and": [orclause, {"$or": dirclauses}]})

    if undirected:
        clauses.append({
            "$and": [{
                "undirected": True
            }, {
                "$or": [{
                    "source": oid
                }, {
                    "target": oid
                }]
            }]
        })

    query["$or"] = clauses

    return json.dumps(graph.count(query))

Esempio n. 11

0

Mostra file

class Stat:
    def __init__(self, config_file):
        self.docker = dockerGuest(config_file)
        self.config_file = config_file
        self.collection = MongoClient(os.environ['DB_PORT_27017_TCP_ADDR'],
                                      27017)['test'][self.get_collection()]

    def get_collection(self):
        dir_path = os.path.dirname(os.path.realpath(__file__))
        file_path = os.path.join(dir_path, self.config_file)
        return json.load(open(file_path))["collection"]

    def get_config_file(self):
        return self.config_file

    @staticmethod
    def get_time():
        time = datetime.now()
        return time
        #return time.strftime('%H:%M:%S')

    def data_to_save(self):
        stats = self.docker.get_stats()
        if stats == None:
            return None
        global_stat_dict = {}
        global_stat_dict["time"] = Stat.get_time()
        global_stat_dict["stats"] = stats
        return global_stat_dict

    def save(self, cap=60):
        new_data = self.data_to_save()
        if self.collection.count() == cap:
            top_doc_time = min(doc['time'] for doc in self.collection.find())
            self.collection.delete_one({'time': top_doc_time})
        self.collection.insert_one(new_data)
        logger.info("Saved in DB...")

    def save_to_db(self):
        data = self.data_to_save()

        if data != None:
            if self.is_db_full():
                self.make_space_db()
            logger.info('DB Save')
            self.collection.insert_one(data)

    def make_space_db(self):
        logger.info('Making space')
        self.collection.delete_one({'_id': self.collection.find()[0]['_id']})

    def is_db_full(self):
        if self.collection.find({}).count() == 60:
            return True
        return False

Esempio n. 12

0

Mostra file

File: graph_data.py Progetto: NJUOCR/judging

class GraphData(object):
    _instance = None
    _instance_lock = Lock()

    host = '101.132.40.25'
    port = 27017

    def __init__(self):
        self.table = MongoClient(host=GraphData.host,
                                 port=GraphData.port).get_database(
                                     'judging').get_collection('graph')

    def __new__(cls, *args, **kwargs):
        """
        singleton

        > Multiple `GraphData()` usage will return the same instance
        :param args:
        :param kwargs:
        :return:
        """
        if GraphData._instance is None:
            with GraphData._instance_lock:
                if GraphData._instance is None:
                    GraphData._instance = object.__new__(cls)
        return GraphData._instance

    def exists(self, graph_name: str) -> bool:
        return self.table.count({'_id': graph_name}) > 0

    def save(self, graph: dict) -> bool:
        """
        **Attention: this method will override the exist graph**
        :param graph:
        :return:
        """
        self.table.save({**{'_id': graph['名称']}, **graph})
        return True

    def fetch(self, graph_name: str) -> dict:
        return self.table.find_one({'_id': graph_name})

    def get_graph_list(self) -> List[str]:
        graph_list = []
        for i in self.table.find({}, {'名称': 1, '_id': 0}):
            graph_list.append(i['名称'])
        return graph_list

    def remove_graph(self, graph_name: str):
        self.table.remove(graph_name)

Esempio n. 13

0

Mostra file

File: data_utils.py Progetto: xenx/recommendation_system

class TvrainData:
    def __init__(self):
        """
        Just load data from Mongo.
        """
        self.sequences = MongoClient(os.environ['MONGODB_URL']).tvrain.sequences
        self.collection = MongoClient(os.environ['MONGODB_URL']).tvrain.articles
        self.collection.create_index("time")

    def get_random_articles(self, n):
        """Returns N of topics for index.html"""
        articles = self.collection.find().sort("time", 1).skip(random.randint(0, self.collection.count())).limit(n)
        return list(articles)

    def get_article_id(self, url):
        """Get id by url"""
        return self.collection.find_one({'url': url})['_id']

    def get_articles_data(self, articles_urls):
        """
        Get data from MongoDB for articles urls
        :param articles_urls: ['article_url', ...]
        :return: list of MongoDB documents
        """
        articles = []
        for url in articles_urls:
            articles.append(self.collection.find_one({'url': url}))
        return articles

    def iterate_articles(self, except_articles, skip=0, limit=None, query=None):
        """
        Iteate throw all articles without ids of except articles
        :param except_articles: list of ids
        :return:
        """
        if query is None:
            query = {}
        if limit is None:
            data = self.collection.find(query).skip(skip)
        else:
            data = self.collection.find(query).skip(skip).limit(limit)

        for value in data:
            if value['_id'] not in except_articles:
                yield value

    def get_sequences(self):
        """Return all sequences for train"""
        return list(self.sequences.find().limit(-1))

Esempio n. 14

0

Mostra file

File: abnormalcy_fixer.py Progetto: sologuboved/greek_morphology

def remove_morphologically_abnormal_verbs():
    abnormal_count = 0
    coll = MongoClient(LOCALHOST, PORT)[DB_NAME][VERBS]
    count = counter(coll.count())
    for entry in coll.find():
        next(count)
        verbs = entry[VERB]
        if isinstance(verbs, str):
            verbs = [verbs]
        for verb in verbs:
            if not (verb.endswith('ω') or verb.endswith('ώ')
                    or verb.endswith('αι')):
                coll.delete_one({VERB: verb})
                abnormal_count += 1
    print("\nRemoved {} abnormal verbs".format(abnormal_count))

Esempio n. 15

0

Mostra file

 def setUpDb(self, host, port, db, collection):
     try:
         mongo_host = os.environ.get(
             host, os.environ.get("MONGO_HOST", "localhost"))
         mongo_port = os.environ.get(port, 27017)
         mongo_database = os.environ.get(
             db, "twitter_database")
         client = MongoClient(mongo_host, mongo_port)[
             mongo_database][collection]
         if collection == "twitter_collection-"+self.owner and client.count() == 0:
             raise Exception(
                 "There is no data in the source database: " + collection)
         return client
     except Exception as err:
         print("Error when connecting to SOURCE database: " + str(err))
         exit(2)

Esempio n. 16

0

Mostra file

File: coll_operations.py Progetto: sologuboved/greek_morphology

def print_verbs(fieldname, fltr, func=None, dbname=DB_NAME, collname=VERBS):
    match = MongoClient(LOCALHOST, PORT)[dbname][collname].find(fltr)
    total = match.count()
    if func:
        res = list()
        count = counter(total)
        for entry in match:
            next(count)
            if func(entry):
                res.append(entry[fieldname])
        print("\n{} matching items".format(len(res)))
    else:
        print(total, "matching entries")
        res = [entry[fieldname] for entry in match]
    for item in res:
        print(item)

Esempio n. 17

0

Mostra file

File: IO.py Progetto: lcynju/ChineseTokenizerService-1

class TextIO:
    def __init__(self):
        self.db = MongoClient('localhost', 20000).get_database('chinese').get_collection('train')

    def get_mongo_size(self):
        size = self.db.count()
        # print("size: %d" % size)
        return size

    def get_text_from_mongo(self, skip=0, limit=1, isRandom=True):
        size = self.get_mongo_size()
        if isRandom:
            skip = random.randint(0, size - limit)

        cursor = self.db.find().skip(skip).limit(limit)
        for doc in cursor:
            yield doc['text']

Esempio n. 18

0

Mostra file

File: abnormalcy_fixer.py Progetto: sologuboved/greek_morphology

def collect_duplicates():
    visited = set()
    duplicates = set()
    coll = MongoClient(LOCALHOST, PORT)[DB_NAME][VERBS]
    count = counter(coll.count())
    for entry in coll.find():
        next(count)
        verbs = entry[VERB]
        if isinstance(verbs, str):
            verbs = [verbs]
        for verb in verbs:
            if verb in visited:
                duplicates.add(verb)
            else:
                visited.add(verb)
    print("\nDumping {} duplicates".format(len(duplicates)))
    dump_utf_json(sorted(list(duplicates)), DUPLICATES_JSON)

Esempio n. 19

0

Mostra file

class Mongodb:
    def __init__(self):
        self.collection = MongoClient()['db_name']['collection_name']

    def count(self):
        return self.collection.count()

    def find_page(self, pager, query=None):
        if pager.is_pre_half:
            result = list(
                self.collection.find(query).skip(pager.offset).limit(
                    pager.page_size))
        else:
            result = list(self.collection.find(query) \
                          .sort([('_id', -1)]).skip(0 if pager.is_last else pager.residue) \
                          .limit(min(pager.page_size, pager.residue if pager.is_last else pager.page_size)))[::-1]
        return result

Esempio n. 20

0

Mostra file

def status(client, db, cell, example):
    '''
    \b
    - list cells, num_entries
    - verbose: find_one() in each cell but truncate sequence field before print
    - include .zoo metadata in the report in the future

    Example:

    \b
    zoo status --db diff --cell mock --example
    '''
    c = MongoClient(client)[db][cell]
    print(c.count(), 'documents.\n')
    if example:
        print('Example:')
        print(json.dumps(c.find_one(), indent=2))
        print()

Esempio n. 21

0

Mostra file

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("twitterUser", type=str, help="Twitter ID")
    parser.add_argument("-l", "--limit", type=int,
                        help="Limit of tweet that have to be scraped, tweet are retrieved in batches of 20, default: 20", default=20)
    args = parser.parse_args()
    mongo_host = os.environ.get("MONGO_HOST", "localhost")
    mongo_port = os.environ.get("MONGO_PORT", 27017)
    mongo_database = os.environ.get(
        "MONGO_TWITTER_DATABASE", "twitter_database")
    mongo_collection = "twitter_collection-" + args.twitterUser
    client = MongoClient(mongo_host, mongo_port)[
        mongo_database][mongo_collection]
    fetch({
        "twitterUser": args.twitterUser,
        "limit": args.limit,
        "mongoClient": client
    })  # Possibly return the collection of tweet in python format readable
    print("Number of tweets inserted in " +
          mongo_collection + ": " + str(client.count()))

Esempio n. 22

0

Mostra file

class MongoKVStorage(KVStorage):
    def __init__(self, config):
        super().__init__(config)

        mongo_host = config['host']
        mongo_port = config['port']
        mongo_db_name = config['db']
        mongo_collection = config['collection']

        self._collection = MongoClient(host=mongo_host, port=mongo_port)[mongo_db_name][mongo_collection]

    def get(self, key):
        found_val = self._collection.find_one({'key': key})

        return found_val

    def set(self, key, value):
        self._collection.update_one({'key': key}, {'$set': value}, upsert=True)

    def exists(self, key):
        return self._collection.count({'key': key}) > 0

Esempio n. 23

0

Mostra file

File: test_model.py Progetto: chrisfischer/gensim-Doc2Vec

def main():
    model = Doc2Vec.load(Settings.MODEL_PATH + "doc2vec.model")
    """test_similarity([("right", "wrong"),
                     ("refresh", "cache")], model)"""

    reports_collection = MongoClient(Settings.MONGO_CONNECTION_STRING)[
        Settings.DOC2VEC_REPORTS_DATABASE][Settings.REPORTS_COLLECTION]
    duplicate_reports_collection = MongoClient(
        Settings.MONGO_CONNECTION_STRING)[Settings.DOC2VEC_REPORTS_DATABASE][
            Settings.DUPLICATE_REPORTS_COLLECTION]

    # get random target text that has a duplciate
    index = random.randint(0, duplicate_reports_collection.count() - 1)
    txt = duplicate_reports_collection.find_one({"reId": index})["dups"][0]
    txt = normalize_text(txt)
    print "Target Text: %s" % txt

    vec = model.infer_vector(txt)

    # print most similar documents
    for reId, p in model.docvecs.most_similar([vec], topn=10):
        print "%0.3f: \"%s\"" % (p, reports_collection.find_one({"reId": reId
                                                                 })["text"])

Esempio n. 24

0

Mostra file

File: export_langs.py Progetto: cterence/gazouilloire

import json, sys
import progressbar
from collections import defaultdict
from pymongo import MongoClient
from gazouilloire.web.export import format_csv

with open('config.json') as confile:
    conf = json.loads(confile.read())

db = MongoClient(conf['mongo']['host'],
                 conf['mongo']['port'])[conf['mongo']['db']]['tweets']

langs = defaultdict(int)
query = {}
print "Counting matching results..."
count = db.count(query)

print "Querying and hashing results..."
bar = progressbar.ProgressBar(max_value=count)
for t in bar(db.find(query, limit=count, projection={"lang": 1, "_id": 0})):
    l = t.get("lang", "")
    langs[l] += 1

print "Sorting and storing csv data..."
with open("langs.csv", "w") as f:
    print >> f, "langs,count"
    bar = progressbar.ProgressBar(max_value=len(langs))
    for l, ct in bar(sorted(langs.items(), key=lambda x: -x[1])):
        print >> f, '%s,%s' % (l, ct)

Esempio n. 25

0

Mostra file

File: export_csv_as_tcat.py Progetto: medialab/gazouilloire

if len(sys.argv) == 2:
    if '{' in sys.argv[1]:
        try:
            query = eval(sys.argv[1])
            if only_selected:
                query = {"$and": [query, {SELECTED_FIELD: True}]}
        except Exception as e:
            sys.stderr.write("WARNING: query wrongly formatted: %s\n" % sys.argv[1])
            sys.exit("%s: %s\n" % (type(e), e))
    elif os.path.exists(sys.argv[1]):
        with open(sys.argv[1]) as f:
            ids = sorted([t.get("id", t.get("_id")) for t in csv.DictReader(f)])
        if include_threads:
            ids = get_thread_ids_from_ids(ids, mongodb)
        query = {"_id": {"$in": ids}}
    else:
        query["text"] = re.compile(sys.argv[1].replace(' ', '\s+'), re.I)
elif len(sys.argv) > 2:
    query["$or"] = []
    for arg in sys.argv[1:]:
        query["$or"].append({"text": re.compile(arg.replace(' ', '\s+'), re.I)})

count = mongodb.count(query)
iterator = yield_csv(mongodb.find(query, sort=[("timestamp", 1)], limit=count), extra_fields=EXTRA_FIELDS)
if verbose:
    import progressbar
    bar = progressbar.ProgressBar(max_value=count)
    iterator = bar(iterator)
for t in iterator:
    print t

Esempio n. 26

0

Mostra file

File: GeneBot.py Progetto: sebotic/scheduled-bots

    parser.add_argument('--fastrun', dest='fastrun', action='store_true')
    parser.add_argument('--no-fastrun', dest='fastrun', action='store_false')
    parser.set_defaults(fastrun=True)
    args = parser.parse_args()
    log_dir = args.log_dir if args.log_dir else "./logs"
    run_id = datetime.now().strftime('%Y%m%d_%H:%M')
    __metadata__['run_id'] = run_id
    taxon = args.taxon
    fast_run = args.fastrun
    coll = MongoClient(args.mongo_uri)[args.mongo_db]["mygene"]

    # get metadata about sources
    # this should be stored in the same db under the collection: mygene_sources
    metadata_coll = MongoClient(
        args.mongo_uri)[args.mongo_db]["mygene_sources"]
    assert metadata_coll.count() == 1
    metadata = metadata_coll.find_one()

    log_name = '{}-{}.log'.format(__metadata__['name'], run_id)
    if wdi_core.WDItemEngine.logger is not None:
        wdi_core.WDItemEngine.logger.handles = []
    wdi_core.WDItemEngine.setup_logging(log_dir=log_dir,
                                        log_name=log_name,
                                        header=json.dumps(__metadata__),
                                        logger_name='gene{}'.format(taxon))

    if "microbe" in taxon:
        microbe_taxa = get_all_taxa()
        taxon = taxon.replace("microbe", ','.join(map(str, microbe_taxa)))

    for taxon1 in taxon.split(","):

Esempio n. 27

0

Mostra file

File: recommend.py Progetto: eb777ez/Yelp-Recommendation-System

from __future__ import division
from pymongo import MongoClient
from settings import Settings
import operator

#Intialize all collection
userscore_collection = MongoClient(Settings.MONGO_CONNECTION_STRING)[Settings.TOPICS_DATABASE][Settings.USERSCORE_COLLECTION]
businessscore_collection = MongoClient(Settings.MONGO_CONNECTION_STRING)[Settings.TOPICS_DATABASE][Settings.BUSINESSSCORE_COLLECTION]
reco_collection = MongoClient(Settings.MONGO_CONNECTION_STRING)[Settings.TOPICS_DATABASE][Settings.RECOMMENDATION_COLLECTION]


# Go through each user and compute top 20 Business for each User 
print userscore_collection.count()
print businessscore_collection.count()
userScoreCollection = userscore_collection.find()


bulk = reco_collection.initialize_unordered_bulk_op()
counter=0
bulkCounter = 0

for user in userScoreCollection:
    
    userTopics = user["userscore"]
    #print "length of user topics " + str(len(userTopics))
    
    ratings= { }
    businessScoreCollection = businessscore_collection.find()
    for business in businessScoreCollection:
        businessTopics = business["businessscore"]
        #print "length of Business topics " + str(len(businessTopics))

Esempio n. 28

0

Mostra file

                    type=str)
parser.add_argument('protdbcoll_name',
                    help='MongoDB ProtDB Collection name',
                    type=str)
parser.add_argument('--host', help='MongoDB host (mongod or mongos)', type=str)
parser.add_argument('--port', help='MongoDB port (mongod or mongos)', type=int)
args = parser.parse_args()

if args.host:
    host = args.host
else:
    host = 'localhost'

if args.port:
    port = args.port
else:
    port = 27017

ProtColl = MongoClient(host, port)[args.protdb_name][args.protdbcoll_name]
half = int(ProtColl.count() / 2)

#half = 82817736 # in indexDB / ComPIL
f = sys.stdin
for protID in f:
    protID = int(protID)
    print(protID)
    if protID <= half:
        print(protID + half)
    elif protID > half:
        print(protID - half)

Esempio n. 29

0

Mostra file

File: export_resolved_links.py Progetto: medialab/gazouilloire

import json
from pymongo import MongoClient

try:
    with open(os.path.join(os.path.dirname(__file__), '..', 'config.json')) as confile:
         conf = json.loads(confile.read())
except Exception as e:
    sys.stderr.write("ERROR: Impossible to read config.json: %s %s\n" % (type(e), e))
    exit(1)

try:
    db = MongoClient(conf['mongo']['host'], conf['mongo']['port'])[conf['mongo']['db']]['links']
except Exception as e:
    sys.stderr.write("ERROR: Could not initiate connection to MongoDB: %s %s\n" % (type(e), e))
    exit(1)

verbose = True
if len(sys.argv) > 1 and "--quiet" in sys.argv:
    sys.argv.remove("--quiet")
    verbose = False

count = db.count()
iterator = db.find()
if verbose:
    import progressbar
    bar = progressbar.ProgressBar(max_value=count)
    iterator = bar(iterator)
print "url\tresolved"
for t in iterator:
    print ('%s\t%s' % (t["_id"], t["real"])).encode('utf-8')

Esempio n. 30

0

Mostra file

    def get_data(table="cmnt"):
        '''
            table: table (collection)
        '''
        limit = request.args.get("limit", 10, type=int)
        page = request.args.get("page", 1, type=int)
        _db = MongoClient().safe_protocol[table]
        # data = _db.find().sort("time", -1).skip(limit * (page - 1)).limit(limit)
        if table == "alert":
            data_list = []
            alerts = _db.find().sort("time",
                                     -1).skip(limit * (page - 1)).limit(limit)
            total = _db.count()
            for alert in alerts:
                alert = {
                    'time': alert.get('time'),
                    'protocol_type': alert.get('type'),
                    'message': alert.get('message')
                }
                data_list.append(alert)

            protocol_type = request.args.get("type")
            if protocol_type:
                data_list = list(
                    filter(lambda x: x['protocol_type'] == protocol_type,
                           data_list))
                total = len(data_list)
            return {'data': data_list, 'total': total}

        elif table == "user":
            data_list = []
            users = _db.find().sort("create_time",
                                    -1).skip(limit * (page - 1)).limit(limit)
            total = _db.count()
            for user in users:
                data_list.append({
                    'user_id': user.get('_id'),
                    'username': user.get('name'),
                    'level': user.get('level'),
                    'create_time': user.get('create_time')
                })

            return {'data': data_list, 'total': total}

        elif table == "oper":
            data_list = []
            opers = _db.find().sort("time",
                                    -1).skip(limit * (page - 1)).limit(limit)
            total = _db.count()
            for oper in opers:
                oper = {
                    'user_id': oper.get('user_id'),
                    'username': oper.get('user_name'),
                    'time': oper.get('time'),
                    'protocol_type': oper.get('protocol_type'),
                    'oper': oper.get('oper')
                }
                data_list.append(oper)
            return {'data': data_list, 'total': total}

        elif table == "cmnt":
            data_list = []
            cmnts = _db.find().sort("time",
                                    -1).skip(limit * (page - 1)).limit(limit)
            total = _db.count()
            for cmnt in cmnts:
                cmnt = {
                    'time': cmnt.get('time'),
                    'buffer': cmnt.get('buffer'),
                    'ip': cmnt.get('ip')
                }
                data_list.append(cmnt)
            return {'data': data_list, 'total': total}

Esempio n. 31

0

Mostra file

class Stat:
    '''
       This class is used  for
        1. Putting stat_list corresponsing to a specific timestamp
        2. Saving in mongoDB
    '''
    
    def __init__(self,config_file):
        self.docker = dockerGuest(config_file)
        self.config_file = config_file
        self.collection = MongoClient()['test'][self.get_collection()] # connecting to mongodb
        

    def get_collection(self):
        '''
          Getting mongodb collection(table) name from config file 
         (This method might be removed later...

       '''
        dir_path = os.path.dirname(os.path.realpath(__file__))
        file_path = os.path.join(dir_path,self.config_file)
        return json.load(open(file_path))["collection"]

    
    def get_config_file(self):
        return self.config_file
        
    @staticmethod
    def get_time():
        '''Getting current timestamp
      
           Storing python datetime object in mongodb.
           This might be changed later.
        '''        
        time = datetime.now()
        return time
    
    def data_to_save(self):
        '''
           Formats data to be saved in mongodb.
           JSON:
                {
                   time: timestamp,
                   stats: [{container1 stat },{ container2 stat } ...]
                }
        '''
        stats = self.docker.get_stats()
        if stats == None:  # if no stat that is there are no containers return None
            return None
        global_stat_dict ={}
        global_stat_dict["time"]=Stat.get_time()
        global_stat_dict["stats"] = stats
        return global_stat_dict

    def save(self,cap=60):
        ''' Saving in DB'''
        
        new_doc = self.data_to_save()
        if self.collection.count() == cap:
            '''
                If there are 60 items in db we delete the oldest timestamp data from db
                and insert the new item. We cannot simply delete an item because insertion in mongodb collection 
                is found out to be random.

            '''
            top_doc_time = min(doc['time'] for doc in self.collection.find()) #oldest timestamp. Simple if datetime objects are stored.
            self.collection.delete_one({'time':top_doc_time})  #delete oldest timestamp
            logger.info("Deleted timestamp is...{}".format(top_doc_time))   
        self.collection.insert_one(new_doc)  #insert new data
        logger.info("Saved in DB...{}".format(new_doc["time"]))


    def save_data(self):
       ''' method not used '''
       
        data = self.data_to_save()
        if data!=None:
            if self.is_db_full():
                self.make_space_db()
            self.collection.insert_one(data)
            logger.info('Saved in DB...')

Esempio n. 32

0

Mostra file

File: test_unit.py Progetto: vadimk2016/PhoneBook

import unittest.mock

Esempio n. 33

0

Mostra file

File: export_resolved_links.py Progetto: cterence/gazouilloire

                           'config.json')) as confile:
        conf = json.loads(confile.read())
except Exception as e:
    sys.stderr.write("ERROR: Impossible to read config.json: %s %s\n" %
                     (type(e), e))
    exit(1)

try:
    db = MongoClient(conf['mongo']['host'],
                     conf['mongo']['port'])[conf['mongo']['db']]['links']
except Exception as e:
    sys.stderr.write(
        "ERROR: Could not initiate connection to MongoDB: %s %s\n" %
        (type(e), e))
    exit(1)

verbose = True
if len(sys.argv) > 1 and "--quiet" in sys.argv:
    sys.argv.remove("--quiet")
    verbose = False

count = db.count()
iterator = db.find()
if verbose:
    import progressbar
    bar = progressbar.ProgressBar(max_value=count)
    iterator = bar(iterator)
print "url\tresolved"
for t in iterator:
    print('%s\t%s' % (t["_id"], t["real"])).encode('utf-8')

Esempio n. 34

0

Mostra file

class MongodbUtil(object):
    """
    - .bashrc 또는 .bashprofile 에 MYSQL_PASSWD 를 설정해야 함.
    """

    def __init__(self, mongo_url, db_name, collection_name, auto_connect=False):
        """
        :param mongo_url: host, port, username, password, auth db
        :param db_name: database name
        :param collection_name: collection name
        :param auto_connect: default do not connect for multiprocessing (http://api.mongodb.com/python/current/faq.html#using-pymongo-with-multiprocessing)
        """
        self.mongo_url = mongo_url
        self.db_name = db_name
        self.collection_name = collection_name
        self.auto_connect = auto_connect
        self.collection = MongoClient(mongo_url, socketKeepAlive=True, connect=auto_connect)[db_name][collection_name]

    def __repr__(self):
        return '%s (db_name:%s, collection_name:%s, auto_connect:%s)' % (
            StringUtil.mask_passwd_in_url(self.mongo_url), self.db_name, self.collection_name, self.auto_connect)

    def __str__(self):
        return self.__repr__()

    def find(self, query=None, sort=None, limit=0):
        if query is None:
            query = {}
        if sort is None:
            sort = [('_id', ASCENDING)]

        for row in self.collection.find(query, no_cursor_timeout=True).sort(sort).limit(limit):
            yield row

    def count(self, query=None):
        if query is None:
            query = {}
        return self.collection.count(query, no_cursor_timeout=True)

    def find_one(self, query: dict, limit=0) -> dict:
        return self.collection.find_one(query, no_cursor_timeout=True).limit(limit)

    def create_index(self, field_list=None, unique=False):
        if field_list is None:
            field_list = []
        for field in field_list:
            self.collection.create_index([(field, ASCENDING)], background=True, unique=unique)
        return

    def insert(self, row: dict):
        return self.collection.insert_one(row)

    def update_one(self, where_query: dict, update_content: dict, upsert=False):
        return self.collection.update_one(
            where_query,
            update_content,
            upsert=upsert
        )

    def update(self, where_query: dict, update_content: dict, upsert=False):
        return self.collection.update_many(
            where_query,
            update_content,
            upsert=upsert
        )

    def save(self, row):
        return self.collection.save(row)

    def delete(self, where_query: dict):
        result = self.collection.delete_one(where_query)
        if result:
            return result.deleted_count
        return 0

    def drop(self):
        return self.collection.drop()

Esempio n. 35

0

Mostra file

File: trials.py Progetto: Jigar24/Yelp-Topic-Modelling

	reviews = corpus_collection.find({'business_id': rest['_id']})
	if reviews.count() > 9:
		print i
		list.append((i,reviews.count()))
		f.write (str(i))
		f.write('\t')
		f.write(str(reviews.count()))
		f.write('\n')

print len(list)
'''	
a = [4.5,4.3,5]
b = [3.1,4.3,5]
rest_rating = []
print 'Cor: ' 
pr =  pearsonr(a,b)[0]
pr = round(pr*100,2)
print '%r %%'% pr

print restaurant_cursor.count()
print corpus_collection.count()

for i in range(2):
	print i
	rest =restaurant_cursor.__getitem__(i)
	reviews = corpus_collection.find({'business_id': rest['_id']})
#print (reviews.count())
	#print ("Restaurant : %s" % rest['name'])
	#print ("Restaurant stars: %s" % rest['stars'])
	rest_rating.append(rest['stars'])
print (rest_rating)

Esempio n. 36

0

Mostra file

File: export_shared_links.py Progetto: medialab/gazouilloire

import json, sys
import progressbar
from pymongo import MongoClient
from gazouilloire.web.export import format_csv

with open('config.json') as confile:
    conf = json.loads(confile.read())

db = MongoClient(conf['mongo']['host'], conf['mongo']['port'])[conf['mongo']['db']]['tweets']

urls = {}
query = {}
#query["langs"] = "fr"
print "Counting matching results..."
count = db.count(query)

print "Querying and hashing results..."
bar = progressbar.ProgressBar(max_value=count)
for t in bar(db.find(query, limit=count, projection={"links": 1, "proper_links": 1})):
    for l in t.get("proper_links", t["links"]):
        if l not in urls:
            urls[l] = 0
        urls[l] += 1

print "Sorting and storing csv data..."
with open("shared_urls.csv", "w") as f:
    print >> f, "url,shares"
    bar = progressbar.ProgressBar(max_value=len(urls))
    for link, shares in bar(sorted(urls.items(), key = lambda x: -x[1])):
        print >> f, '%s,%s' % (format_csv(link), shares)

Esempio n. 37

0

Mostra file

File: export_csv_as_tcat.py Progetto: cterence/gazouilloire

    elif os.path.exists(sys.argv[1]):
        with open(sys.argv[1]) as f:
            ids = sorted(
                [t.get("id", t.get("_id")) for t in csv.DictReader(f)])
        if include_threads:
            ids = get_thread_ids_from_ids(ids, mongodb)
        query = {"_id": {"$in": ids}}
    else:
        query["text"] = re.compile(sys.argv[1].replace(' ', '\s+'), re.I)
elif len(sys.argv) > 2:
    query["$or"] = []
    for arg in sys.argv[1:]:
        query["$or"].append(
            {"text": re.compile(arg.replace(' ', '\s+'), re.I)})

if limit:
    total = limit
elif count and verbose:
    total = mongodb.count(query)
else:
    total = mongodb.count()
iterator = yield_csv(mongodb.find(query, sort=[("timestamp", 1)], limit=total),
                     extra_fields=EXTRA_FIELDS)

if verbose:
    import progressbar
    bar = progressbar.ProgressBar(max_value=total)
    iterator = bar(iterator)
for t in iterator:
    print t

Esempio n. 38

0

Mostra file

File: backup_corpus_ids.py Progetto: cterence/gazouilloire

    with open(os.path.join(os.path.dirname(__file__), '..',
                           'config.json')) as confile:
        conf = json.loads(confile.read())
except Exception as e:
    sys.stderr.write("ERROR: Impossible to read config.json: %s %s" %
                     (type(e), e))
    exit(1)

try:
    mongodb = MongoClient(conf['mongo']['host'],
                          conf['mongo']['port'])[conf['mongo']['db']]['tweets']
except Exception as e:
    sys.stderr.write("ERROR: Could not initiate connection to MongoDB: %s %s" %
                     (type(e), e))
    exit(1)

verbose = True
if len(sys.argv) > 1 and "--quiet" in sys.argv:
    sys.argv.remove("--quiet")
    verbose = False

print "id"
iterator = mongodb.find(projection=["_id"])
if verbose:
    import progressbar
    count = mongodb.count()
    bar = progressbar.ProgressBar(max_value=count)
    iterator = bar(iterator)
for t in iterator:
    print t["_id"]

Esempio n. 39

0

Mostra file

File: 数据库数量监控.py Progetto: xman55555/Scrapy-1024-torrent-

from pymongo import MongoClient
import time

while True:
    client = MongoClient()['spider']['1024']
    print(client.count())
    time.sleep(2)

Esempio n. 40

0

Mostra file

File: app.py Progetto: anka-sirota/game-off-2012

import os
import json
from random import randint
from flask import Flask, jsonify, request
from fighter import Arena, Fighter
from pymongo import MongoClient

app = Flask(__name__)
arena = Arena(8)

repos = MongoClient().githunt.repos
repos_count = repos.count()

fights = MongoClient().gitfighter.fights

def get_full_name(repo):
    return '%s/%s' % (repo['owner']['login'], repo['name'])

@app.route("/arena")
def arena_json():
    print(arena.json)
    return jsonify(arena.json)

@app.route("/fight", methods=['POST'])
def fight():
    fighters_stats = request.json
    print fighters_stats
    arena.set_fighters(fighters_stats)
    arena.start()
    fight = {"log": arena.log}
    response = jsonify(fight)