Python Connection.insertの例、pymongo.Connection.insert Pythonの例

コード例 #1

0

ファイルを表示

ファイル: dao.py プロジェクト: mvdh/CMIROD021P

class DaoSunPosition():

    def __init__(self):
        self.col = Connection()['rdam']['sunpos']
        self.bulk = []

    def create_datetime_index(self):
        self.col.create_index('datetime')

    def persist(self, sunpos):
        self.bulk.append(sunpos)

    def flush(self):
        self.col.insert(self.bulk)
        self.bulk = []

    def find_within_time(self, start_date, end_date):
        result = []
        for bson in self.col.find({ '$and': [
                { 'datetime' : { '$gt': start_date }}, 
                { 'datetime' : { '$lt': end_date }}
            ]}):
            sunpos = SunPosition(bson['az'], bson['el'], bson['datetime'])
            result.append(sunpos)
        return result

コード例 #2

0

ファイルを表示

def save(nmap, **kwargs):
    logger = save.get_logger()

    nampTask = Connection().phoenorama.nmapTask
    nampTask.insert(nmap.toJSON())

    logger.info("Nmap Task was successfully saved")

コード例 #3

0

ファイルを表示

ファイル: views.py プロジェクト: hardcoreinc/compromise

def saveCompromise(request):
    try:
        #user = "******"
        #send_mail(EMAIL_SUBJECT_CREATE, (EMAIL_TEXT_CREATE % "http://ya.ru/"), EMAIL_HOST_USER, [user])
        currentCompromise = request.POST.get("json")

        currentCompromise = json.loads(currentCompromise)

        mongoConnection = Connection(host="127.0.0.1", port=27017)["compDB"]["compromiseCollection"]
        users = currentCompromise.get("users", [])
        if not users:
            users = ['*****@*****.**', '*****@*****.**', "*****@*****.**"]

        currentCompromise["type"] = "event"
        recordId = mongoConnection.insert(currentCompromise)

        for user in users:
            uniqDesc = md5(user + str(recordId)).hexdigest()
            uniqUrl = ANSWER_URL + uniqDesc

            mongoConnection.insert({"uniqDesc": uniqDesc, "idEvent": str(recordId), 'mail': user})
            send_mail(EMAIL_SUBJECT_CREATE, (EMAIL_TEXT_CREATE % uniqUrl), EMAIL_HOST_USER, [user])

        return HttpResponse('{"status": "ok", "url": %s}' % uniqUrl)

    except TypeError:
        return HttpResponse("bad json")

コード例 #4

0

ファイルを表示

ファイル: tasks.py プロジェクト: pblondin/Phoenorama-CORE

def run(nmap, **kwargs):
    '''
    Start nmap task
    '''
    logger = run.get_logger()
    
    # Update OpenVAS Task status and report_uuid
    __updateNmap(nmap, {'status': "RUNNING"})
    
    start_task = "%s" % (nmap.target)
    cmd = shlex.split(TOOL_PATH + start_task)
    report_xml = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0]
    
    #TODO: Validate start_task status
    
    # Save report
    report = parse(StringIO(report_xml))    
    nmapReport = Connection().phoenorama.nmapReport
    nmapReport.insert(report.toJSON())
    
    # Update OpenVAS Task status and report_uuid
    __updateNmap(nmap, {'status': "DONE", 'report_uuid': report.report_uuid})

    logger.info("Report id: %s was successfully generated and saved to DB" % report.report_uuid)
    return report.report_uuid

コード例 #5

0

ファイルを表示

def run(nmap, **kwargs):
    '''
    Start nmap task
    '''
    logger = run.get_logger()

    # Update OpenVAS Task status and report_uuid
    __updateNmap(nmap, {'status': "RUNNING"})

    start_task = "%s" % (nmap.target)
    cmd = shlex.split(TOOL_PATH + start_task)
    report_xml = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0]

    #TODO: Validate start_task status

    # Save report
    report = parse(StringIO(report_xml))
    nmapReport = Connection().phoenorama.nmapReport
    nmapReport.insert(report.toJSON())

    # Update OpenVAS Task status and report_uuid
    __updateNmap(nmap, {'status': "DONE", 'report_uuid': report.report_uuid})

    logger.info("Report id: %s was successfully generated and saved to DB" %
                report.report_uuid)
    return report.report_uuid

コード例 #6

0

ファイルを表示

def save(openvas, **kwargs):
    logger = save.get_logger()
    
    openvasTask = Connection().phoenorama.openvasTask
    openvasTask.insert(openvas.toJSON())
    
    logger.info("Openvas Task was successfully saved")

コード例 #7

0

ファイルを表示

ファイル: crawler.py プロジェクト: vdeleon/timeline

class CustomStreamListener(tweepy.StreamListener):
	def __init__(self):
		self.tweets = Connection().mytimeline.tweets
		super(CustomStreamListener, self).__init__()

	def on_data(self, data):
		self.tweets.insert(json.loads(data))

コード例 #8

0

ファイルを表示

ファイル: tasks.py プロジェクト: pblondin/Phoenorama-CORE

def save(nmap, **kwargs):
    logger = save.get_logger()
    
    nampTask = Connection().phoenorama.nmapTask
    nampTask.insert(nmap.toJSON())
    
    logger.info("Nmap Task was successfully saved")

コード例 #9

0

ファイルを表示

ファイル: iterateTrainer.py プロジェクト: Adoni/JD_Profiling

def update_min_max_sum(entity):
    '''
    Used to insert min max and sum
    '''
    collection = Connection().jd['train_%s_mentions' % entity]
    collection_user = Connection().jd['train_%ss' % entity]
    mentions = [
        line[:-1].decode('utf8')
        for line in open('../features/mention.feature')
    ]
    min_d = dict()
    max_d = dict()
    sum_d = dict()
    sum_u_d = dict()
    for m in mentions:
        min_d[m] = float('inf')
        max_d[m] = -1
        sum_d[m] = 0
        sum_u_d[m] = 0
    for user in collection_user.find():
        for m in user['mentions']:
            v = user['mentions'][m]
            if v < min_d[m]:
                min_d[m] = v
            if v > max_d[m]:
                max_d[m] = v
            sum_d[m] += v
            sum_u_d[m] += 1
    for m in mentions:
        collection.insert({
            '_id':
            m,
            'distribute': [min_d[m], max_d[m], sum_d[m], sum_u_d[m]]
        })

コード例 #10

0

ファイルを表示

ファイル: index.py プロジェクト: abhijat/RedditSearch

def update_all():
    tags = Connection()["reddit"]["tags"]
    index = Connection()["reddit"]["inverted_index"]

    invalid = ['.', '$']
    for tag in tags.find():
        for key in tag.keys():
            if key != "_id":
                word_list = tag[key]
                for w in word_list:
                    for i in invalid:
                        if i in w:
                            w = w.replace(i,'')
                    row = index.find_one({"key" : w})
                    if not row:
                        index.insert({"key": w, "ids" : [key]})
                    else:
                        print "Updating", w
                        print row, row["ids"]
                        lst = list(row["ids"])
                        print lst, key
                        lst.append(key)
                        new_row = {"key":w, "ids": lst}
                        print new_row
                        index.update({"key":w}, new_row)

コード例 #11

0

ファイルを表示

ファイル: iterateTrainer.py プロジェクト: Adoni/JD_Profiling

def update_min_max_sum(entity):
    '''
    Used to insert min max and sum
    '''
    collection=Connection().jd['train_%s_mentions'%entity]
    collection_user=Connection().jd['train_%ss'%entity]
    mentions=[line[:-1].decode('utf8') for line in open('../features/mention.feature')]
    min_d=dict()
    max_d=dict()
    sum_d=dict()
    sum_u_d=dict()
    for m in mentions:
        min_d[m]=float('inf')
        max_d[m]=-1
        sum_d[m]=0
        sum_u_d[m]=0
    for user in collection_user.find():
        for m in user['mentions']:
            v=user['mentions'][m]
            if v<min_d[m]:
                min_d[m]=v
            if v>max_d[m]:
                max_d[m]=v
            sum_d[m]+=v
            sum_u_d[m]+=1
    for m in mentions:
        collection.insert({'_id':m,'distribute':[min_d[m],max_d[m],sum_d[m],sum_u_d[m]]})

コード例 #12

0

ファイルを表示

ファイル: load_zipcodes.py プロジェクト: imclab/upwardly

class MongoZipEmitter(Emitter):
    
    def __init__(self):
        super(MongoZipEmitter, self).__init__()
        self._zipcodes = Connection()[settings.MONGO_DATABASE]['zipcodes']
        self._zipcodes.drop()
    
    def emit_record(self, record):
        self._zipcodes.insert(record)

コード例 #13

0

ファイルを表示

def __saveReport(report_uuid):

    getReport_task = "--get-report %s" % (report_uuid)
    cmd = shlex.split(TOOL_PATH + getReport_task)
    report_xml = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0]    
    #@TODO: check if output result is valid
  
    report = parse(StringIO(report_xml))    
    openvasReport = Connection().phoenorama.openvasReport
    openvasReport.insert(report.toJSON())
        
    return "Report was successfully generated and saved to DB"

コード例 #14

0

ファイルを表示

def construct_test_user():
    all_products = get_all_ids_from_file('product')
    collection = Connection().jd.test_users
    collection.drop()
    linked_users = Connection().jd.weibo_users
    fname = RAW_DATA_DIR + 'test_user_review.data'
    uids_with_kids = [
        line[:-1] for line in open(RAW_DATA_DIR + 'uids_with_kids.data')
    ]
    uids_without_kids = [
        line[:-1] for line in open(RAW_DATA_DIR + 'uids_without_kids.data')
    ]
    linked_uids = dict([(line[:-1].split(' ')[1], line[:-1].split(' ')[0])
                        for line in open(RAW_DATA_DIR + 'linked_uids.data')])
    prone_words = ['宝宝', '女儿', '儿子', '男朋友', '女朋友']
    f = open(fname)
    count = int(f.readline()[:-1])
    bar = progress_bar(count)
    for i in xrange(count):
        uid = f.readline()[:-1]
        products = f.readline()[:-1].split(' ')
        products = list(set(products) & all_products)
        mentions = count_mentions(f.readline())
        profile = {
            'gender': [0] * 2,
            'age': [0] * 2,
            'location': [0] * 2,
            'kids': [0] * 2,
        }
        if uid in linked_uids:
            user = linked_users.find_one({'_id': linked_uids[uid]})
            if user == None:
                pass
            else:
                profile['gender'] = user['profile']['gender']
                profile['age'] = user['profile']['age']
                profile['location'] = user['profile']['location']
        if uid in uids_with_kids:
            profile['kids'] = [0, 1]
        if uid in uids_without_kids:
            profile['kids'] = [1, 0]
        if uid in uids_without_kids or uid in uids_with_kids:
            for w in prone_words:
                if w in mentions:
                    mentions.pop(w)
        collection.insert({
            '_id': uid,
            'products': products,
            'mentions': mentions,
            'profile': profile
        })
        bar.draw(i + 1)

コード例 #15

0

ファイルを表示

 def test_queue_object_can_attach_to_a_collection_that_already_exists(self):
     collection = Connection().karait_test.queue_test
     collection.insert({
         'routing_key': 'foobar',
         'message': {
             'apple': 3,
             'banana': 5
         },
         'timestamp': 2523939,
         'expire': 20393
     })
     queue = Queue(database='karait_test', queue='queue_test')
     self.assertEqual(1, collection.find({}).count())

コード例 #16

0

ファイルを表示

ファイル: gamehandler.py プロジェクト: moonhorse/dnb

class GameHandler(BaseHandler):
    allowed_methods = ('GET', 'POST')

    def __init__(self):
        self.games = Connection().dnb.games;

    def read(self, request, id):
        return list(self.games.find())

    def create(self, request, id):
        if (request.data):
            request.data['time'] = time()
            self.games.insert(request.data)
        return rc.CREATED

コード例 #17

0

ファイルを表示

def construct_train_product():
    all_users = get_all_ids_from_file('user') | get_all_ids_from_file(
        'test_user')
    collection = Connection().jd.train_products
    fname = RAW_DATA_DIR + 'product_review.data'
    f = open(fname)
    count = int(f.readline()[:-1])
    bar = progress_bar(count)
    for i in xrange(count):
        pid = f.readline()[:-1]
        users = f.readline()[:-1].split(' ')
        users = list(set(users) & all_users)
        mentions = count_mentions(f.readline())
        collection.insert({'_id': pid, 'users': users, 'mentions': mentions})
        bar.draw(i + 1)

コード例 #18

0

ファイルを表示

ファイル: views.py プロジェクト: hardcoreinc/compromise

def addAnswer(request):
    curAnswer = request.POST.get("json")
    curAnswer = json.loads(curAnswer)
    compromises = Connection(host="127.0.0.1", port=27017)["compDB"]["compromiseCollection"]
    curRecord = compromises.find_one({"_id": ObjectId(curAnswer["_id"])})

    del curAnswer["_id"]
    curAnswer["type"] = "answer"
    curAnswer["compromise_id"] = curRecord["_id"]

    answers = Connection(host="127.0.0.1", port=27017)["compDB"]["answers"]
    answers.insert(curAnswer)

    del curRecord["_id"]
    return HttpResponse(json.dumps(curRecord))

コード例 #19

0

ファイルを表示

ファイル: test_queue.py プロジェクト: edomaur/capsulemq

 def test_queue_object_can_attach_to_a_collection_that_already_exists(self):
     collection = Connection().capsulemq_test.queue_test
     collection.insert({
         'routing_key': 'foobar',
         'message': {
             'apple': 3,
             'banana': 5
         },
         'timestamp': 2523939,
         'expire': 20393
     })
     queue = Queue(
         database='capsulemq_test',
         queue='queue_test'
     )
     self.assertEqual(1, collection.find({}).count())

コード例 #20

0

ファイルを表示

ファイル: app.py プロジェクト: bbqsrc/vapour

class Application(tornado.web.Application):
    def __init__(self, handlers, **settings):
        tornado.web.Application.__init__(self, handlers, **settings)
        self.collection = Connection().vapour.urls
        self.templates = TemplateLookup(directories=["templates"])

    def get_link_by_id(self, id):
        record = self.collection.find_one({'_id': uuid.UUID(id)})
        return fix_id(record)

    def get_links_by_tag(self, tag):
        records = self.collection.find({'tags': re.compile(tag, re.I)})
        return fix_ids(records)

    def get_links_by_url(self, url):
        records = self.collection.find({'url': re.compile(url, re.I)})
        return fix_ids(records)

    def insert_link(self, url, desc, tags):
        return self.collection.insert({
            '_id': uuid.uuid4(),
            'url': url,
            'desc': desc,
            'tags': tags,
            'added': datetime.datetime.utcnow()
        })

コード例 #21

0

ファイルを表示

ファイル: fblogger.py プロジェクト: vireshas/pes-confessions

def extract(data):
	collection = Connection()['conf']['b']
	for post in data:
		logger = {}
		try: 
			confID = getID(post["message"])
		except KeyError as k:
			print 'Error', k
			continue
		logger["confID"] = confID
		logger["message"] = post["message"]
		cursor = collection.find({'confID': str(confID)})
		if cursor.count() != 0:
			continue
		collection.insert(logger, safe=True)
		print 'Inserted', confID

コード例 #22

0

ファイルを表示

 def split(self):
     newc = Connection().aivb_redux.dater
     [
         newc.insert(x) for x in self.search('period', 'date_range', True,
                                             'subject_count', 1)
     ]
     self.log.mlog.info("New database was created!")

コード例 #23

0

ファイルを表示

ファイル: test_message.py プロジェクト: kittle/karait-python

 def test_calling_delete_removes_the_message_from_mongodb(self):
     collection = Connection().karait_test.queue_test
     collection.insert({
         'routing_key': 'foobar',
         'apple': 3,
         'banana': 5,
         '_meta': {
             'timestamp': 2523939,
             'expire': 20393,
             'expired': False
         }
     })
     raw_message = collection.find_one({'_meta.expired': False})
     self.assertEqual(3, raw_message['apple'])
     message = Message(raw_message, queue_collection=collection)
     message.delete()
     self.assertEqual(0, collection.find({'_meta.expired': False}).count())

コード例 #24

0

ファイルを表示

ファイル: test_message.py プロジェクト: edomaur/capsulemq

 def test_calling_delete_removes_the_message_from_mongodb(self):
     collection = Connection().capsulemq_test.queue_test
     collection.insert({
         'routing_key': 'foobar',
         'apple': 3,
         'banana': 5,
         '_meta': {
             'timestamp': 2523939,
             'expire': 20393,
             'expired': False
         }
     })
     raw_message = collection.find_one({'_meta.expired': False})
     self.assertEqual(3, raw_message['apple'])
     message = Message(raw_message, queue_collection=collection)
     message.delete()
     self.assertEqual(0, collection.find({'_meta.expired': False}).count())

コード例 #25

0

ファイルを表示

ファイル: index.py プロジェクト: abhijat/RedditSearch

def update_index(id, words):
    index = Connection()["reddit"]["inverted_index"]
    for w in words:
        for i in invalid:
            if i in w:
                w = w.replace(i,'')
        row = index.find_one({"key" : w})
        if not row:
            index.insert({
                "key" : w,
                "ids" : [id]
                })
        else:
            lst = list(row["ids"])
            if id not in lst:
                lst.append(id)
                new_row = {"key":w, "ids":lst}
                index.update({"key" : w}, new_row)

コード例 #26

0

ファイルを表示

def construct_train_user():
    from pyltp import Segmentor
    all_products = get_all_ids_from_file('product')
    collection = Connection().jd.train_users
    fname = RAW_DATA_DIR + 'user_review.data'
    f = open(fname)
    count = int(f.readline()[:-1])
    print count
    bar = progress_bar(count)
    for i in xrange(count):
        uid = f.readline()[:-1]
        products = f.readline()[:-1].split(' ')
        products = list(set(products) & all_products)
        mentions = count_mentions(f.readline())
        collection.insert({
            '_id': uid,
            'products': products,
            'mentions': mentions
        })
        bar.draw(i + 1)

コード例 #27

0

ファイルを表示

ファイル: logmonitor.py プロジェクト: dir01/futuritest

class MongoLogWriter(object):
    def __init__(self, db_name, collection_name, host="localhost", port=27017):
        self.collection = Connection(host, port)[db_name][collection_name]

    def parse_and_write(self, content):
        log_entries = content.strip("\n").split("\n")
        log_entries = filter(None, log_entries)
        map(self.parse_and_write_single_log_entry, log_entries)

    def parse_and_write_single_log_entry(self, log_entry):
        timestamp, uid, url, status, response_time, response_length = log_entry.split("|")
        log_dt = datetime.fromtimestamp(float(timestamp))
        self.collection.insert(
            {
                "datetime": log_dt,
                "uid": uid if uid != "-" else None,
                "status": int(status),
                "response_time": float(response_time),
                "response_length": int(response_length),
            }
        )

コード例 #28

0

ファイルを表示

ファイル: dao.py プロジェクト: mvdh/CMIROD021P

class DaoShadow():

    def __init__(self):
        self.col = Connection()['rdam']['shadow']
        self.bulk = []

    def create_index(self):
        self.col.create_index([('loc', GEO2D), ('date_time', 1)])

    def persist(self, shadow):
        bson = self.shadow_to_bson(shadow)
        self.bulk.append(bson)
        if(len(self.bulk) >= 10):            
            self.flush()

    def flush(self):
        self.col.insert(self.bulk)
        self.bulk = []     

    def shadow_to_bson(self, shadow):
        bson = {'loc': {'lat':shadow.lat, 'lon':shadow.lon}, 
                'date_time': shadow.date_time}
        return bson

コード例 #29

0

ファイルを表示

ファイル: bottlepaste.py プロジェクト: christopherreay/bottlepaste

class MongoDB(Database):
    """ MongoDB abstraction. """

    def __init__(self, mongodburi):
        self._description = 'mongo'
        db_name = mongodburi[mongodburi.rfind('/') + 1:]
        self._mongo = Connection(mongodburi)[db_name][COLLECTION]

    def __contains__(self, uid):
        return self._mongo.find_one(uid) is not None

    def get(self, uid):
        entry = self._mongo.find_one(uid)
        return zlib.decompress(entry['code']) if entry is not None else None

    def put(self, code, uid=None):
        if uid is None:
            uid = Database.hash_(code)
        if uid not in self:
            self._mongo.insert(Database.make_ds(uid,
                Binary(zlib.compress(code))),
                    safe=True)
        return uid

コード例 #30

0

ファイルを表示

ファイル: experiment-1.py プロジェクト: FlavioFalcao/ebots

network_config = [2, 2]

robot = robotics.EnergyRobot(components[0], None, 'robot_0', '127.0.0.1')
population = evolution.DifferentialEvolution(F, CR, NP,
             neural.EvolvableFFANN, network_config, logistic)

for iteration in range(iterations):
    population.recombine()
    info = []

    for i, (current, candidate) in enumerate(population):
        fitness = []
        robot.network = candidate

        for j in range(trials):
            robot.move(steps, interval)
            fitness.append(robot.energy)

        candidate.fitness = mean(fitness)

        msg = '> iteration {0}, individual {1}, fitness {2} (previous {3})'
        print msg.format(iteration, i, candidate.fitness, current.fitness)

        info.append({'iteration': iteration,
                     'robot': i,
                     'fitness': current.fitness,
                     'weights': current.weights})

    population.select()
    collection.insert(info)

コード例 #31

0

ファイルを表示

ファイル: dao.py プロジェクト: mvdh/CMIROD021P

class DaoHeight():

    def __init__(self, scale=1):
        self.col = Connection()['rdam']['height']
        self.bulk = []
        # Py(mongo) can't handle large x and y values,
        # so you can use to create smaller xs and ys.
        # Scale is the power of 10. So scale = 3 would
        # mean scale by 1000
        self.scale = scale

    def create_geo_index(self, min=0, max=1000):
        self.col.create_index([('loc', GEO2D)], min=min , max=max)

    def persist(self, point):
        bson = self.point_to_bson(point)
        self.bulk.append(bson)
        if(len(self.bulk) >= 50000):            
            self.flush()

    def flush(self):
        if(len(self.bulk) > 0):
            self.col.insert(self.bulk)
            print len(self.bulk)
            self.bulk = []

    def find_within_box(self, points):
        box = self.points_to_box(points)
        result = []
        for bson in self.col.find({'loc': {'$within': {'$box': box}}}):
            point = self.bson_to_point(bson)
            result.append(point)
        return result

    def find_within_polygon(self, points):
        polygon = self.points_to_polygon_bson(points)
        result = []
        for bson in self.col.find({'loc': {'$within': {'$polygon': polygon}}}):
            point = self.bson_to_point(bson)
            result.append(point)
        return result

    def points_to_box(self, points):
        p1x = points[0].x / math.pow(10, self.scale)
        p1y = points[0].y / math.pow(10, self.scale)
        p2x = points[1].x / math.pow(10, self.scale)
        p2y = points[1].y / math.pow(10, self.scale)
        box = {
            'a': {'x':p1x, 'y':p1y},
            'b': {'x':p2x, 'y':p2y}
        }
        return box

    def points_to_polygon_bson(self, points):
        result = []
        for point in points:
            bson = self.point_to_polygon_bson(point)
            result.append(bson)
        return result

    def point_to_polygon_bson(self, point):
        x = point.x / math.pow(10, self.scale)
        y = point.y / math.pow(10, self.scale)
        return {'x': x, 'y': y}

    def points_to_bson(self, points):
        result = []
        for point in points:
            bson = self.point_to_bson(point)
            result.append(bson)
        return result     

    def point_to_bson(self, point):
        x = point.x / math.pow(10, self.scale)
        y = point.y / math.pow(10, self.scale)
        z = point.z
        return {'loc': {'x': x, 'y': y}, 'z': z}

    def bson_to_point(self, bson):
        x = bson['loc']['x'] * math.pow(10, self.scale)
        y = bson['loc']['y'] * math.pow(10, self.scale)
        z = bson['z']
        return Point(x, y, z)

コード例 #32

0

ファイルを表示

ファイル: feeds.py プロジェクト: cohnj/newstune

        feed = feedparser.parse(feed_url)
        for entry in feed['entries']:
            n_total += 1
            if conn.find_one({'url': entry['link']}):
                logging.debug('Skip: %s' % entry['link'])
                n_skipped += 1
                continue

            n_downloaded += 1
            article = newspaper.Article(entry['link'])
            article.download()
            article.parse()
            conn.insert({
                'url': entry['link'],
                'link': article.canonical_link,
                'title': article.title,
                'text': article.text,
                'date': str(article.publish_date),
                'images': article.images,
                'source': source
            })
            n_inserted += 1
            logging.debug('Insert: %s' % entry['link'])

    response = "%d total articles\n%d downloaded\n%d inserted\n%d skipped" % (
        n_total, n_downloaded, n_inserted, n_skipped)

    print response
    logging.info(response)

    time.sleep(3600)

コード例 #33

0

ファイルを表示

ファイル: AutoCompModule.py プロジェクト: ilaio/NLP-Project

class AutoCompModule:

    # Auto completion module
    # Using the MongoDB server
    # Holds three dictionaries :
    #   dict - holds the amount of x's appearances in the learned text
    #   dictBy2 - holds the amount of (x,y) appearances in the learned text
    #   dictBy2 - holds the amount of (x,y,z) appearances in the learned text
    def __init__(self,DBName):
        self.dict = Connection()[DBName]['dict']
        self.dictBy2 = Connection()[DBName]['dictBy2']
        self.dictBy3 = Connection()[DBName]['dictBy3']
    
    
    # Method to learn from a single file
    # For each file the method detects all the information mentioned above
    # Definitions :
    #   pprev,prev,word are the three last seen words (where word is the current word) 
    def learnSingle(self,fileName):
        input = open(fileName, encoding='utf-8')
        for line in input:
            pprev = prev = None
            for word in line.split():
                if re.match("[.,\"\(\);']",word):
                    pprev = prev = word = None
                    continue
            
                if self.dict.find_one({"word": word,"grade": { "$exists": True}}) != None:
                    self.dict.update({"word": word},{ "$inc": {"grade":1}})
                else:
                    self.dict.insert({"word": word, "grade":1, "info": None})
            
                if prev!=None:
                    if self.dictBy2.find_one({"first": prev,"second": word,"grade": { "$exists": True}}) != None:
                        self.dictBy2.update({"first": prev,"second": word},{ "$inc": {"grade":1}})
                    else:
                        self.dictBy2.insert({"first": prev,"second": word,"grade":1})
                    if pprev!=None:
                        if self.dictBy3.find_one({"first": pprev,"second": prev,"third": word,"grade": { "$exists": True}}) != None:
                                  self.dictBy3.update({"first": pprev,"second": prev,"third": word},{ "$inc": {"grade":1}})
                        else:
                            self.dictBy3.insert({"first": pprev,"second": prev,"third": word,"grade":1})
                    pprev=prev
                prev = word
        input.close()


    # Method to learn from multiple files
    # Uses learnSingle Method
    def learn(self,inputDir):
        size = len(os.listdir(inputDir))
        i=1
        if os.path.isdir(inputDir):
            for f in sorted(os.listdir(inputDir)):
                self.learnSingle(inputDir + '/' + f)
                sys.stdout.flush()
                print(str(int((i*100)/size))+"%",end="\r") 
                i+=1   
            print ("SUCCESS LEARNING FINISH")
        else:
            print ("ERROR!!")


    def addMalletInfoToDB(self, wtcfile, twwfile, keysfile):
        wordDict = malletGetWordsAndData(wtcfile, twwfile, keysfile)
        for word in wordDict:
            if self.dict.find_one({"word": word,"grade": { "$exists": True}}) != None:
                    self.dict.update({"word": word},{"$set":{"info": wordDict[word]}}) #####################################


    # Method that suggests the next word
    # For a given pprev and prev (definitions mentioned above) it finds the most likely word, one time
    # using only prev and the second using both pprev and prev
    # 
    # This method returns both NONE and NOT NONE values
    # None values are returned when there is no match to prev (or pprev and prev) in the dictionaries 
    # or when they are given as NONE
    def suggest(self,pprev=None,prev=None):
        if prev is None:
            return None , None
        if pprev is None:
            a = self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)])
            if a is not None:
                return a["second"] , None
            else:
                return None, None
        a = self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)])
        b =  self.dictBy3.find_one({"first": pprev, "second": prev},sort=[("grade",-1),("third",1)])
        if b is not None:
            return a["second"] , b["third"]
        else:
            return None , None
    
    def suggest2(self,pprev=None,prev=None,x=5):
        if prev is None:
            return None , None
        i=0
        lst=[]
        for a in self.dictBy2.find({"first": prev}).sort([('grade',-1),('second',1)]):
            if i<x:
                lst.append(a)
                i+=1
            else:
                break
        if lst == []:
            return None, None
        else:
            res1 = [[a["grade"],a["second"]] for a in lst]       
        if pprev is None:
            return res1, None
        else:
            i=0
            lstBy3=[]
            for a in self.dictBy3.find({"first": pprev,"second":prev}).sort([('grade',-1),('second',1)]):
                    if i<x:
                        lstBy3.append(a)
                        i+=1
                    else:
                        break
            if lstBy3 is []:
                return res1, None
            else:
                return res1,[[a["grade"],a["third"]] for a in lstBy3]

コード例 #34

0

ファイルを表示

ファイル: AutoCompModule.py プロジェクト: dgingi/NLP-Project

class AutoCompModule:

    def __init__(self,DBName):
        self.dict = Connection()[DBName]['dict']
        self.dictBy2 = Connection()[DBName]['dictBy2']
        self.dictBy3 = Connection()[DBName]['dictBy3']
    
    def learnSingle(self,fileName):
        input = open(fileName, encoding='utf-8')
        for line in input:
            pprev = prev = None
            for word in line.split():
                if re.match("[.,\"\(\);']",word):
                    pprev = prev = word = None
                    continue
            
                if self.dict.find_one({"word": word,"amount": { "$exists": True}}) != None:
                    self.dict.update({"word": word},{ "$inc": {"amount":1}})
                else:
                    self.dict.insert({"word": word, "amount":1})
            
                if prev!=None:
                    if self.dictBy2.find_one({"first": prev,"second": word,"grade": { "$exists": True}}) != None:
                        self.dictBy2.update({"first": prev,"second": word},{ "$inc": {"grade":1}})
                    else:
                        self.dictBy2.insert({"first": prev,"second": word,"grade":1})
                    if pprev!=None:
                        if self.dictBy3.find_one({"first": pprev,"second": prev,"third": word,"grade": { "$exists": True}}) != None:
                                  self.dictBy3.update({"first": pprev,"second": prev,"third": word},{ "$inc": {"grade":1}})
                        else:
                            self.dictBy3.insert({"first": pprev,"second": prev,"third": word,"grade":1})
                    pprev=prev
                prev = word

        for entity in self.dictBy3.find():
            amount = self.dictBy2.find_one({"first": entity["first"],"second": entity["second"]})["grade"]
            self.dictBy3.update({"first": entity["first"],"second": entity["second"],"third": entity["third"]},{ "$set": {"grade": entity["grade"]/amount }})
        for entity in self.dictBy2.find():
            amount = self.dict.find_one({"word": entity["first"]})["amount"]
            self.dictBy2.update({"first": entity["first"],"second": entity["second"]}, { "$set": {"grade": entity["grade"]/amount}})

        input.close()

    def learn(self,inputDir):
        if os.path.isdir(inputDir):
            for f in sorted(os.listdir(inputDir)):
                self.learnSingle(inputDir + '/' + f)
            print ("SUCCESS LEARNING")
        else:
            print ("ERROR!!")

    def suggest(self,pprev=None,prev=None):
        if prev is None:
            return None , None
        if pprev is None:
            return self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)])["second"] , None
        return self.dictBy2.find_one({"first": prev},sort=[("grade",-1),("second",1)])["second"] , self.dictBy3.find_one({"first": pprev, "second": prev},sort=[("grade",-1),("third",1)])["third"]
    
    def simpleTest(self, testFile, num):
        test = open(testFile,'r',encoding='utf-8')
        numOfChecks1 = numOfChecks2 = succ1 = succ2 = 0
        i = num
        for line in test:
            pprev = prev = None
            for word in line.split():
                if re.match("[.,\"\(\);']",word):
                    pprev = prev = word = None
                    i = num
                    continue
                if i!= 0:
                    i-=1
                    pprev = prev
                    prev = word
                else:
                    a,b = self.suggest(pprev,prev)
                    if a is not None:
                        if a is word:
                            succ1+=1
                        numOfChecks1+=1
                    if b is not None:
                        if b is word:
                            succ2+=1
                        numOfChecks2+=1
                    i=num
                    pprev=prev
                    prev=word
        test.close()
        return succ1/numOfChecks1, succ2/numOfChecks2

コード例 #35

0

ファイルを表示

ファイル: preparing_data_for_each.py プロジェクト: chaconnewu/repohunter

                    "Other_issue": 0
                }
            else:
                project_item['past_month'] = repo_dict[href]['past_month']
                project_item['past_week'] = repo_dict[href]['past_week']
                project_item['yesterday'] = repo_dict[href]['yesterday']
                cnt += 1

            cur_item['children'].append(project_item)

        next_a = next_a.find_next('a')

    else:
        if len(cur_item['children']) > 0:
            # document.append(cur_item)
            curation_collection.insert(cur_item)
        cur_category = next_category
        cur_item = {"category": cur_category.contents[0], "children": []}
        next_category = next_category.find_next('h2')
        # print next_category.contents[0]
        # print cnt
        # print '-------------------------------------'
        # cnt = 0

print cnt
# print next_category
# print next_a

# f = open('changed_awesome_go.html', 'w')
# f.write(str(soup))
# f.close()

コード例 #36

0

ファイルを表示

ファイル: gmtest.py プロジェクト: ichuan/yc-lib

#!/usr/bin/env python
# coding:utf-8

import threading, time
from pymongo import Connection


lock = threading.RLock()
cond = threading.Condition()
table = Connection().test.tablex
table.drop()
table.insert({'x': 0})

def target():
	cond.acquire()
	cond.wait()
	lock.acquire()
	i = table.find_one({'x': 0})
	if i:
		# found it
		table.update({'_id': i['_id']}, {'$set': {'x': 1}})
	lock.release()
	cond.release()
	if i:
		print '\n%s got i' % threading.current_thread()

for i in range(20):
	t = threading.Thread(target=target)
	t.start()

time.sleep(1)

コード例 #37

0

ファイルを表示

for line in cur.fetchall():
    actor, repo_name, event_type = line
    owner = repo_name.rsplit('/', 1)
    # if not repo_dict[repo_name].get("yesterday"):
    #     repo_dict[repo_name]['yesterday'] = {"Owner_push": 0, "Owner_issue": 0, "Owner_pullrequest": 0, "Other_star": 0, "Other_pullrequest": 0, "Other_issue": 0}

    key_prefix = "Owner_" if owner == actor else "Other_"
    cur_item = repo_dict[repo_name]['yesterday']
    if event_type == 'PushEvent':
        cur_item["Owner_push"] += 1

    if event_type in ['IssuesEvent', 'IssueCommentEvent']:
        cur_item[key_prefix + 'issue'] += 1

    if event_type in ['PullRequestEvent', 'PullRequestReviewCommentEvent']:
        cur_item[key_prefix + 'pullrequest'] += 1

    if event_type == 'WatchEvent':
        cur_item["Other_star"] += 1

# Save to mongodb
client = MongoClient('localhost', 27017)
curation_collection = Connection()['curation']['curation_profile']

for k, v in repo_dict.iteritems():
    v['url'] = k
    curation_collection.insert(v)

del curation_collection

コード例 #38

0

ファイルを表示

#!/usr/bin/env python
# coding:utf-8

import threading, time
from pymongo import Connection

lock = threading.RLock()
cond = threading.Condition()
table = Connection().test.tablex
table.drop()
table.insert({'x': 0})


def target():
    cond.acquire()
    cond.wait()
    lock.acquire()
    i = table.find_one({'x': 0})
    if i:
        # found it
        table.update({'_id': i['_id']}, {'$set': {'x': 1}})
    lock.release()
    cond.release()
    if i:
        print '\n%s got i' % threading.current_thread()


for i in range(20):
    t = threading.Thread(target=target)
    t.start()

コード例 #39

0

ファイルを表示

class Mongo(object):
    def __init__(self, log, sw=None):

        self.name = self.__class__.__name__
        self.log = log
        self.articles = Connection().aivb_db.articles \
            if not sw else Connection().aivb_redux.dater

    def __str__(self):
        return """
                'all':        None,
                'search':     {k: v},
                'empty':      {k: 0},
                'filled':     {k: {'$gt': 0.5}},
                'gtv':        {k: {'$gt': v}},
                'regex':      {k: {'$regex': v}},
                'exists':     {k: {'$exists': True}},
                'and_ex':     {'$and': [{k: v}, {k2: {'$exists': True}}]},
                'grt_ex':     {'$and': [{k: {'$exists': True}}, {k2: {'$gt': v2}}]},
                'grt_eq':     {'$and': [{k: {'$exists': True}}, {k2: v2}]},
                'p_range':    {'$and': [{k: {'$gte': v}}, {k2: {'$lte': v2}}]},
                'period':     {'$and': [{k: v}, {k2: {'$gt': v2}}]},
                'andand':     {'$and': [{k: v}, {k2: v2}]}
                """

    def load(self, n=None):
        load = Loader(self.log)
        data = load.fetch_data(n)
        [[self.articles.insert(i) for i in x] for x in data]
        self.log.mlog.info("Inserted %d Instances of articles." % n)

    def search(self,
               command,
               key=None,
               value=None,
               s_key=None,
               s_value=None,
               t_key=None):
        if not key:
            res = [self.articles.find_one()]
        else:
            res = self.parse_search(command, key, value, s_key, s_value, t_key)
        return res

    def clear_all(self, v=None):
        for art in self.articles.find():
            if v:
                print art
            self.articles.remove(art)

    def parse_search(self, c, k, v, k2, v2, k3):
        op = {
            'all': None,
            'search': {
                k: v
            },
            'empty': {
                k: 0
            },
            'filled': {
                k: {
                    '$gt': 0.5
                }
            },
            'gtv': {
                k: {
                    '$gt': v
                }
            },
            'regex': {
                k: {
                    '$regex': v
                }
            },
            'exists': {
                k: {
                    '$exists': True
                }
            },
            'and_ex': {
                '$and': [{
                    k: v
                }, {
                    k2: {
                        '$exists': True
                    }
                }]
            },
            'grt_ex': {
                '$and': [{
                    k: {
                        '$exists': True
                    }
                }, {
                    k2: {
                        '$gt': v2
                    }
                }]
            },
            'grt_eq': {
                '$and': [{
                    k: {
                        '$exists': True
                    }
                }, {
                    k2: v2
                }]
            },
            'p_range': {
                '$and': [{
                    k: {
                        '$gte': v
                    }
                }, {
                    k2: {
                        '$lte': v2
                    }
                }]
            },
            'period': {
                '$and': [{
                    k: v
                }, {
                    k2: {
                        '$gt': v2
                    }
                }]
            },
            'andand': {
                '$and': [{
                    k: v
                }, {
                    k2: v2
                }]
            }
        }
        if 'select' not in c:
            return self.articles.find(op[c])
        else:
            if not k3:
                return self.articles.find(op[c.split('_')[1]], {
                    '_id': k2,
                    v2: 1
                })
            else:
                return self.articles.find(op[c.split('_')[1]], {
                    '_id': k2,
                    v2: 1,
                    k3: 1
                })

    def update(self, c, eid, k, v, k2=None):
        op = {'one': {'$set': {k: v}}, 'two': {'$set': {k2: {'$set': {k: v}}}}}
        self.articles.update({'_id': eid}, op[c], upsert=False, multi=False)