Example #1
0
class MongoDB():
    def __init__(self, db_uri, db_name, need_auth=False, auth=()):
        self.db = MongoClient(db_uri)[db_name]
        if need_auth:
            self.db.authenticate(*auth)

    def insert_one(self, tb_name, data, encrypt_word):
        try:
            data['_id'] = hashlib.md5(encrypt_word.encode()).hexdigest()[:16]
            rs = self.db[tb_name].insert_one(data)
            return True
        except Exception as e:
            if 'duplicate key error index' in str(e):
                return False
            print('other mistake -->', e)
            return None

    def delete_one(self, tb_name, key_value_dict):
        try:
            self.db[tb_name].delete_one(key_value_dict)
            return True
        except Exception as e:
            return False

    def get_one_and_pop_one(self, tb_name):
        res = self.db[tb_name].find_one()
        if self.delete_one(tb_name, {'_id': res['_id']}):
            return res
        return None
Example #2
0
class TwseDailyDataset(object):
    def __init__(self, mongo_url='localhost'):
        self.db = MongoClient(host=mongo_url)['twse_daily']
        self.db.authenticate(name='crawler', password='******')
        self.collections = [
            self.db[c]
            for c in ['BFI82U', 'FMTQIK', 'MI_INDEX', 'MI_MARGN', 'STOCK_DAY']
        ]

    def date_to_datetime(self, obj_or_year, month=None, day=None):
        """將date物件轉換成datetime物件,時間設為台灣時間的下午2點(收盤時間)。
        pymongo寫入datetime物件時,會自動轉換成utc timezone。
        """
        if month is None:
            year, month, day = obj_or_year.year, obj_or_year.month, obj_or_year.day
        else:
            year = obj_or_year
        dt = datetime(year=year, month=month, day=day, hour=14)
        return pytz.timezone('Asia/Taipei').localize(dt)

    def __getitem__(self, day):
        data = dict()
        for c in self.collections:
            result = c.find_one({'_id': self.date_to_datetime(day)})
            if result:
                data.update(result)
        return data

    def gen_features(self, feature_names, only_trading_day=True):
        start_day = date(2004, 4, 1)
        end_day = date.today(
        ) if datetime.now().hour > 15 else date.today() - timedelta(days=1)
        dates = []
        day = start_day
        while day <= end_day:
            dates.append(day)
            day += timedelta(days=1)

        masks = np.zeros(len(dates), dtype=bool)
        features = np.zeros((len(dates), len(feature_names)), dtype=np.float32)
        cond = {n: 1 for n in feature_names}
        cond.update({'_id': 0})

        for i, day in enumerate(dates):
            feat = {}
            for c in self.collections:
                result = c.find_one({'_id': self.date_to_datetime(day)}, cond)
                if result:
                    feat.update(result)
            if feat:
                masks[i] = True
            for j, name in enumerate(feature_names):
                if name in feat:
                    features[i, j] = feat[name]

        dates = np.array(dates)
        if only_trading_day:
            features = features[masks]
            dates = dates[masks]
        return dates, features
Example #3
0
def get_db():
    # db = MongoClient('140.143.94.171', 27017).crawler
    # db.authenticate('mongodbcrawler', 'Shantianci56')

    db = MongoClient('10.82.244.18', 27018).crawler
    db.authenticate('spider', 'spider')
    return db
    def getOplog(self):
        db = MongoClient(self.mongo_host, self.mongo_port)

        if self.mongo_user and self.mongo_password:
            db.authenticate(self.mongo_user, self.mongo_password)

        dbl = db.local
        coll = dbl['oplog.rs']

        op_first = (coll.find().sort('$natural', 1).limit(1))

        while op_first.alive:
            op_fst = (op_first.next())['ts'].time

        op_last = (coll.find().sort('$natural', -1).limit(1))

        while op_last.alive:
            op_last_st = op_last[0]['ts']
            op_lst = (op_last.next())['ts'].time

        status = round(float(op_lst - op_fst), 1)
        self.addMetrics('mongodb.oplog', status)

        currentTime = timegm(gmtime())
        oplog = int(((str(op_last_st).split('('))[1].split(','))[0])
        self.addMetrics('mongodb.oplog-sync', (currentTime - oplog))
class MongoDBHelper:
    def __init__(self, config):
        self.response_code = None
        self.response_data = {}
        self.config = config

        self.db_address = self.config["db_address"]
        self.db_address_uri = self.config["db_address_uri"]
        self.db_port = int(self.config["db_port"])
        self.db_user = self.config["db_user"]
        self.db_pwd = self.config["db_pwd"]
        self.db_name = self.config["db_name"]
        self.connection = None

        self.authenticate_mongo_connection()

    def mongo_connection(self):
        try:
            self.connection = MongoClient(self.db_address, self.db_port)
            logging.info("MongoDB Connection is Done !!!")

        except Exception as e:
            logging.error("MongoDB Connection Error: {}".format(str(e)))
            raise Exception("MongoDB Connection Error: {}".format(str(e)))

    def authenticate_mongo_connection(self):
        try:
            connection = MongoClient(self.db_address_uri)
            self.connection = connection[self.db_name]
            self.connection.authenticate(self.db_user, self.db_pwd)
            logging.info("MongoDB Connection is Done !!!")

        except Exception as e:
            logging.error("MongoDB Auth Connection Error: {}".format(str(e)))
            raise Exception("MongoDB Auth Connection Error: {}".format(str(e)))

    def get_connection(self):
        return self.connection

    def run_query(self, query):
        try:
            self.result_list = []
            self.result = self.connection.find(query[0], {'_id': False})
            for i in self.result:
                self.result_list.append(json.dumps(i))
            logging.info("MongoDB Run Filter Query is Done")
        except Exception as e:
            logging.error("MongoDB Run Filter Query Error: {}".format(str(e)))
            raise Exception("MongoDB run Filter Query Error: {}".format(
                str(e)))

    def insert_to_mongo(self, collection, data):
        try:
            collection = self.connection[str(collection)]
            _insert = collection.insert(data)
            logging.info("Insert Done")
        except exception as e:
            logging.error("Insert to MongoDB Error: {}".format(str(e)))
            raise Exception("Insert to MongoDB Error: {}".format(str(e)))
Example #6
0
 def _get_db_connection(self):
     parsed = urlsplit(MONGODB_URI)
     db_name = parsed.path[1:]
     db = MongoClient(MONGODB_URI)[db_name]
     if '@' in MONGODB_URI:
         user, password = parsed.netloc.split('@')[0].split(':')
         db.authenticate(user, password)
     return db
Example #7
0
class InnerMongo:
    def __init__(self):
        self.db = MongoClient('10.82.244.18', 27018).crawler
        self.db.authenticate('spider', 'spider')

    @property
    def db(self):
        return self.db
Example #8
0
def RouteMongoConn():

    db = MongoClient("mongodb://*****:*****@10.10.233.135")["store"]
    db.authenticate("writer", "miaoji1109")

    collection = db.get_collection("session")

    return collection
Example #9
0
class OuterMongo:
    def __init__(self):
        self.db = MongoClient('140.143.94.171', 27017).crawler
        self.db.authenticate('mongodbcrawler', 'Shantianci56')

    @property
    def db(self):
        return self.db
class MongoProcessor(BaseProcessor):
    def __init__(self, _upstream, _enviroinment):
        BaseProcessor.__init__(self, _upstream, _enviroinment)
        self._collection = None

    def run(self):
        self.run_process.start()

    def _run(self):
        self.logr.debug("Mongo Processor Started")
        self.run_loop()

    def run_loop(self):
        while not self._stopped.is_set():
            payload = self.next_message()
            if not None == payload:
                payload['random'] = random.random()
                payload['after_gnip'] = True
                self.put_in_mongo(payload)
        self.logr.debug("Exiting Mongo run loop")

    def put_in_mongo(self, obj):
        self.logr.debug("Putting in Mongo: " + str(obj))
        for rule in obj['gnip']['matching_rules']:
            self.client()[rule['tag']].insert(obj)

    def client(self):
        if not self._client:
            host = self.environment.mongo_host
            port = int(self.environment.mongo_port)
            db = self.environment.mongo_db
            self._client = MongoClient(host=host, port=port)[db]
            self._client.authenticate(self.environment.mongo_username,self.environment.mongo_password)
        return self._client

    # def collection(self):
    #     if not self._collection:
    #         self._collection = self.environment.mongo_collection
    #     return self._collection

    def stop(self):
        self._stopped.set()

    def stopped(self):
        return self._stopped.is_set() and self.queue.qsize() == 0

    def running(self):
        self.run_process.is_alive() and not self.stopped()

    def next_message(self):
        ret_val = None
        if self.queue.qsize() > 0:
            try:
                ret_val = self.queue.get(block=False)
            except multiprocessing.queues.Empty:
                self.logr.error("Queue was empty when trying to get next message")
        return ret_val
Example #11
0
class Game3DmPipeline(object):
    def __init__(self):
        self.db = MongoClient('master').test
        self.db.authenticate('jmw', 'jmw123')
        self.collection = self.db.game

    def process_item(self, item, spider):
        self.collection.insert(dict(item))
        return item
Example #12
0
def clear_db(param):
    db = MongoClient(host=db_config['host'],
                     port=db_config['port'])[db_config['db_name']]
    if db_config['username'] is not None and db_config['password'] is not None:
        db.authenticate(db_config['name'], db_config['password'])

    db.battles.drop()
    db.counter.update({"_id": 0}, {"$set": {"battles": 0, "users": 0}})

    print("SUCC")
Example #13
0
class Game3DmPipeline(object):

    def __init__(self):
        self.db = MongoClient('192.168.254.131').game_3dm
        self.db.authenticate('gameUser','123456')
        self.collection = self.db.game

    def process_item(self, item, spider):
        self.game.insert(dict(item))
        return item
Example #14
0
def get_db(host=database.host,
           user=database.user,
           pwd=database.pwd,
           dbname=database.dbname,
           rs=database.rs):
    '''get db'''
    db = MongoClient(host=host, replicaset=rs)[dbname]
    if user != '':
        db.authenticate(user, pwd)
    return db
Example #15
0
def mongo_start():
    fin = open(sys.argv[2], "r")

    text = fin.read()
    index = text.split()
    cnt = 0
    db = MongoClient("127.0.0.1", 27017).test
    db.authenticate("test", "test")
    cl = db.syntax3
    bulk = cl.initialize_unordered_bulk_op()

    for file in index:
        fin1 = open(file, "r")
        text1 = fin1.read()
        print file
        sentences = parse_text_file(file, text1)

        for s in sentences:
            tree1 = make_whole_tree(s[2], s[1])
            tk = s[1]
            tk.append(('', ''))
            leaf = []
            for i, node in enumerate(tree1):
                if len(node['c']) == 0:
                    leaf.append(node)

            retJson = {'r1': [], 'r2': [], 'r3': []}
            arg = dict({
                'tk': tk,
                'tree': tree1,
                'nodes': leaf,
                'strlist1': retJson['r1'],
                'strlist2': retJson['r2'],
                'strlist3': retJson['r3'],
            })
            check_find(arg)
            bulk.insert({
                'sent': s[0],
                'res1': retJson['r1'],
                'res2': retJson['r2'],
                'res3': retJson['r3']
            })

        cnt += 1
        if cnt % 100 == 0:
            # break
            result = bulk.execute()
            pprint(result)
            bulk = cl.initialize_unordered_bulk_op()

    result = bulk.execute()
    pprint(result)

    db.logout()
Example #16
0
class MongodbClient(object):
    
    def __init__(self, host=MONGODB_HOST, port=MONGODB_PORT):
        self.db = MongoClient(host=host, port=port)[MONGODB_DATABASE]
        self.db.authenticate(MONGODB_USER, MONGODB_PASSWORD)

    # db = client.admin
    # # 认证用户密码
    # db.authenticate('root', '123456')
    def mongodb_insert(self, table, parameter):
        """
        :param table: 表名
        :param parameter: 插入mongodb的数据,字典类型
        :return: 返回该条插入的数据,方便获取ID ["_id"]
        """
        x = self.db[table].insert(parameter)
        return str(x)
    
    def mongodb_find_one(self, table, parameter={}):
        """
            传入字典类型数据,查找符合条件的,单条数据,并返回字典型数据集合,table为表名
        """
        return self.db[table].find_one(parameter)
    
    def mongodb_find(self, table, parameter, projection={}):
        """
            传入字典类型数据,查找符合条件的所有数据,并返回字典型数据集合,table为表名
        """
        return self.db[table].find(parameter, projection)
    
    def mongodb_remove(self, table, parameter):
        """
            传入字典型数据,删除符合条件的数据集,table为表名
        """
        self.db[table].remove(parameter)
    
    def mongodb_find_limit(self, table='', param={}, projection={}, limit=0):
        return self.db[table].find(param, projection).limit(limit)
    
    def mongodb_count(self, table='', param={}):
        return self.db[table].find(param).count()
    
    def mongodb_update(self, table, condition_parameter, result_parameter):
        """
            传入条件参数、需要修改的参数,都为字典型,自动添加更新日期,table为表名
        """
        self.db[table].update_many(
            condition_parameter,
            {
                "$set": result_parameter,
                "$currentDate": {"updateTime": True}
            }
        )
Example #17
0
def connect():
    mongo_settings = dict(inexcess.read_config('mongo'))
    user = mongo_settings['user']
    port = mongo_settings['port']
    host = mongo_settings['url']
    password = mongo_settings['pass']
    db = mongo_settings['db']
    uri = 'mongodb://'+user+':'+password+'@'+host+':'+port+'/'+db

    database = MongoClient(uri)[db]
    database.authenticate(user,password)
    return database
Example #18
0
class Database(object):

    '''
    Represent a MongoDB database, backed by pymongo's MongoClient
    '''

    def __init__(self, **kwargs):
        '''
        Initialise
        '''
        if 'uri' in kwargs:
            self._client = MongoClient(kwargs['uri'])
            if 'db' in kwargs:
                self._db = self._client[kwargs['db']]
            else:
                self._db = self._client.get_default_database()
        else:
            self._client = MongoClient(
                kwargs.get('host', 'localhost'),
                kwargs.get('port', 27017)
            )
            self._db = self._client[kwargs.get('db', 'test')]
        self.Document = type('Document', (BaseDocument,), {'_db': self._db})

    def authenticate(self, *args, **kwargs):
        self._client.authenticate(*args, **kwargs)

    def drop(self):
        self._client.drop_database(self._db.name)

    def drop_collection(self, collection):
        if isinstance(collection, str):
            self._db.drop_collection(collection)
        elif issubclass(collection, BaseDocument):
            self._db.drop_collection(collection.__collection__)
        else:
            raise TypeError

    def get_collections(self, include_system_collections=False):
        return self._db.collection_names(include_system_collections)

    @property
    def host(self):
        return self._client.host

    @property
    def port(self):
        return self._client.port

    @property
    def name(self):
        return self._db.name
Example #19
0
def get_db():
    url = os.getenv('MONGO_URI', 'mongodb://*****:*****@' in url:
        user, password = parsed.netloc.split('@')[0].split(':')
        db.authenticate(user, password)

    return db
Example #20
0
def mongo_auth():
    splited = MONGO_URI.split('//')[-1]
    host = splited.split('@')[-1].split(':')[0]
    if host == 'localhost':
        db = MongoClient(MONGO_URI)
        return db.db
    port = int(splited.split('@')[-1].split(':')[-1].split('/')[0])
    base = splited.split('@')[-1].split('/')[-1]
    user = splited.split('@')[0].split(':')[0]
    passw = splited.split('@')[0].split(':')[1]
    connection = MongoClient(host, port, retryWrites=False)
    db = connection[base]
    db.authenticate(user, passw)
    return db
Example #21
0
def mongo_start():
    fin = open(sys.argv[2], "r")

    text = fin.read()
    index = text.split()
    cnt = 0
    db = MongoClient("127.0.0.1", 27017).test
    db.authenticate("test", "test")
    cl = db.syntax3
    bulk = cl.initialize_unordered_bulk_op()

    for file in index:
        fin1 = open(file, "r")
        text1 = fin1.read()
        print file
        sentences = parse_text_file(file, text1)

        for s in sentences:
            tree1 = make_whole_tree(s[2], s[1])
            tk = s[1]
            tk.append(('', ''))
            leaf = []
            for i, node in enumerate(tree1):
                if len(node['c']) == 0:
                    leaf.append(node)

            retJson = {'r1': [], 'r2': [], 'r3': []}
            arg = dict({
                'tk': tk,
                'tree': tree1,
                'nodes': leaf,
                'strlist1': retJson['r1'],
                'strlist2': retJson['r2'],
                'strlist3': retJson['r3'],
            })
            check_find(arg)
            bulk.insert({'sent': s[0], 'res1': retJson['r1'], 'res2': retJson['r2'], 'res3': retJson['r3']})

        cnt += 1
        if cnt % 100 == 0:
            # break
            result = bulk.execute()
            pprint(result)
            bulk = cl.initialize_unordered_bulk_op()

    result = bulk.execute()
    pprint(result)

    db.logout()
Example #22
0
class Database:
    def __init__(self, environment='local'):
        # connect to local database
        if environment == 'local':
            self.db = MongoClient('mongodb://localhost:27017')['newstime-dev']
        # connect to production database
        elif environment == 'production':
            self.db = MongoClient(
                'mongodb://ds129428.mlab.com:29428/')['newstime-prd']
            self.db.authenticate('karl', 'karl')  # username and passw
        else:
            print(
                "Error: database environment must either be local or production!"
            )
            return

        self.articles = self.db.articles
        self.timelines = self.db.timelines

    def getArticlesFromTimeline(self, timeline):
        return self.articles.find({'_id': {'$in': timeline['articles']}})

    def getWaitlistedArticles(self):
        return self.articles.find({'waitlisted': True})

    def insertTimeline(self, timeline):
        return self.timelines.insert_one(timeline).inserted_id

    def insertArticle(self, article):
        return self.articles.insert_one(article).inserted_id

    def updateTimeline(self, timeline):
        self.timelines.update_one({'_id': timeline['_id']}, {"$set": timeline},
                                  upsert=False)

    def updateArticle(self, article):
        self.articles.update_one({'_id': article['_id']}, {"$set": article},
                                 upsert=False)

    def isArticleInDatabase(self, link):
        # find instead of find_one to get a cursor instead of a document, and limit to 1 result to improve performance
        return self.articles.find({'link': link}).limit(1).count() > 0

    def removeArticle(self, article):
        self.articles.remove({'_id': article['_id']})

    def cleanCollections(self):
        self.articles.remove({})
        self.timelines.remove({})
Example #23
0
def mongo_auth():
    # 'mongodb://*****:*****@ds125525.mlab.com:25525/heroku_xjf41300'
    splited = MONGO_URI.split('//')[-1]
    host = splited.split('@')[-1].split(':')[0]
    if host == 'localhost':
        db = MongoClient(MONGO_URI)
        return db.db
    port = int(splited.split('@')[-1].split(':')[-1].split('/')[0])
    base = splited.split('@')[-1].split('/')[-1]
    user = splited.split('@')[0].split(':')[0]
    passw = splited.split('@')[0].split(':')[1]
    connection = MongoClient(host, port, retryWrites=False)
    db = connection[base]
    db.authenticate(user, passw)
    return db
Example #24
0
class MongoDBClient(object):
    def __init__(self):
        self.__mongodb_username = os.environ['MONGODB_USERNAME']
        self.__mongodb_password = os.environ['MONGODB_PASSWORD']
        self.__mongodb_host = os.environ['MONGODB_HOST']
        self.__mongodb_database_name = os.environ['MONGODB_DATABASE_NAME']
        self.__mongodb_port = os.environ['MONGODB_PORT']
        self.__mongodb_db = MongoClient(
            self.__mongodb_host,
            int(self.__mongodb_port))[self.__mongodb_database_name]
        self.__mongodb_db.authenticate(self.__mongodb_username,
                                       self.__mongodb_password)

    def get_mongodb(self):
        return self.__mongodb_db
Example #25
0
    def get_reviews(self):
        """Collect data for all links in self.courses and store it in MongoDB.

        Once self.courses in populated with a list of links, this method will
        distribute the links among multiple processes to collect the reviews,
        users, and course info for each page of reviews.
        """
        dbase = MongoClient()['GolfRecs']
        with open('../application/secrets.yaml', 'r') as secrets_file:
            secrets = yaml.load(secrets_file)['MongoDB']
        dbase.authenticate(secrets['user'], secrets['pass'])
        if self.courses.size < 1:
            raise Exception(
                "No links exist for retrieving reviews. Either call " +
                "self.get_courses() or set self.courses equal to a list " +
                "of courses you wish to process."
            )
        handler = DataHandler(self.sessions)
        courses_lists = array_split(
            self.courses,
            self.courses.size // (POOL_SIZE * 10)
        )
        for courses in courses_lists:
            while True:
                try:
                    userpages, courses, reviews = handler.\
                        get_reviews(dbase, courses)
                    break
                except Exception as err:  # pylint: disable=W0703
                    print('Exception Occurred: {}'.format(err))
                    renew_connection()
            while True:
                try:
                    users = handler.get_users(dbase, userpages)
                    break
                except Exception as err:  # pylint: disable=W0703
                    print(
                        'Exception Occurred in collecting Users: {}'
                        .format(err)
                    )
                    renew_connection()
            renew_connection()
            data = (users, courses, reviews)
            collections = ['Users', 'Courses', 'Reviews']
            filters = ['Username', 'GA Id', 'Review Id']
            for args in zip(repeat(dbase), collections, data, filters):
                handler.write_documents(*args)
Example #26
0
def main():
    db = MongoClient(host='10.2.11.234', port=27017)['web_news']
    db.authenticate('uestc', 'mongoDB')
    dbkey = db['wechatkeys']
    for i in dbkey.find():
        biz = i['biz']
        headers = i['headers']
        for k in headers.keys():
            headers[k] = headers[k][0]
        url = i['url']
        seq = '?'
        ans = ''
        for k in url.keys():
            ans += seq + k + '=' + url[k][0]
            seq = '&'
        url = ans
        crawl(biz, url, headers)
Example #27
0
def main():
    # establing connection to db
    try:
        #connect = MongoClient("10.3.141.1" ,27017)
        #connect.sensors.authenticate(mongo_login, mongo_pass)
        #connect = MongoClient('mongodb://' + mongo_login + ':' + mongo_pass + '@127.0.0.1:27017')
        connect = MongoClient('10.3.141.1:27017')
        connect.authenticate(mongo_login, mongo_pass)
        print("Connected successfully!!!")
    except:
        print("Could not connect to MongoDB")
        
    # connecting or switching to the databases
    """
    sensorDB = connect.sensors

    # creating or switching to demoCollection
    global temp_col
    global rssi_col
    global pos_col
    global ecg_col
    temp_col = sensorDB.temp
    rssi_col = sensorDB.rssi
    pos_col = sensorDB.positionning
    ecg_col = sensorDB.ecg

    """
    client= paho.Client("client-001") 
    ######Bind function to callback
    client.on_message=on_message
    #####

    print("connecting to broker ",broker,port)
    client.connect(broker,port)#connect
    client.loop_start() #start loop to process received messages
    print("subscribing ")
    client.subscribe("anchor/#")#subscribe

    

    

    # We loop waiting for callbacks
    while(True):
        time.sleep(4)
def predictOnStream(predictionModelsPath, bufferSize=1000, streamSrc=getTwitterStream, streamSrcConfig={}, streamProcessor=TweetProcessor):
    '''
    All classes of streamProcessor need to implement the function, process(), as the common interface.
    '''
    predictors = sorted([fName for fName in next(os.walk(predictionModelsPath))[2] if fName.endswith('.pkl') and fName[0] == 'c'])
    tfIdfVectorizers = sorted([fName for fName in next(os.walk(predictionModelsPath))[2] if fName.endswith('.pkl') and fName[0] == 'v'])
    predictors = [load(open(predictionModelsPath + pName, 'rb')) for pName in predictors]
    tfIdfVectorizers = [load(open(predictionModelsPath + vName, 'rb')) for vName in tfIdfVectorizers]
    vecAndPredTuple = tuple(vecAndPred for vecAndPred in izip(tfIdfVectorizers, predictors))
    db=MongoClient('ds051459.mongolab.com', 51459)['tanav']
    db.authenticate('user', 'password')
    tweetsColl=db.tweets

    with streamSrc(streamSrcConfig) as stream:
        print('Stream established.')
        idSet = set()
        maxIdPrevBatch, maxIdCurrentBatch = 0, 0
        processedTweetBuff = []
        rawTxtArr = []
        
        for t in stream:
            try:
                t = json.loads(t)
            except ValueError:  # Might be caused by the keep-alive new line character described in https://dev.twitter.com/docs/streaming-apis/connecting#Stalls
                continue
            if streamProcessor.filter(t):
                processedT, rawTxt = streamProcessor.process(t)
                rawTxtArr.append(rawTxt)
                processedTweetBuff.append(processedT)
                if len(processedTweetBuff) > bufferSize:
                    # append semantic scores
                    resultsProba = map(lambda vecAndPred: vecAndPred[1].predict_proba(vecAndPred[0].transform(array(rawTxtArr))), vecAndPredTuple)
                    starRankings = (argmax(resultsProba, 2) + 1.0).T.tolist()
                    probabilities = amax(resultsProba, 2).T.tolist()
                    for tweet, starR, proba in izip(processedTweetBuff, starRankings, probabilities):
                        tweet['semantic_scores'] = starR
                        tweet['semantic_probabilities'] = proba
                    tweetsColl.insert(processedTweetBuff) # bulk insert into MongoDB
                    processedTweetBuff[:] = []  # empty the buffer
                    rawTxtArr[:] = []
                    streamProcessor.idSet.clear()
                    streamProcessor.maxIdPrevBatch = streamProcessor.maxIdCurrentBatch

    print('Stream terminated')
Example #29
0
def get_connection(env=None):
    """

    Gets MongoDB connection. If user and password provided - authenticates (logs in)
    :param env: dictionary, example:
            {'host': 'ds045454.mongolab.com',
             'tz_aware': True,
             'max_pool_size': 10,
             'port': 45454}
    :return: MongoClient
    """
    # checking for environment variables
    mongo_uri = os.getenv("MONGO_URI")
    mongo_db = os.getenv("MONGO_DB")
    if mongo_uri and mongo_db:
        client = MongoClient(mongo_uri)
        log.info("MongoDB environment variables found: %s!" % mongo_uri)
        return client[mongo_db]

    # environment variables not found, looking for details from configuration file
    env = env or default_env
    _env = env.copy()
    dbname = _env.pop('db', None)

    # if auth details supplied - getting details
    user = password = None
    if 'user' in _env:
        user = _env.pop('user')
    if 'password' in _env:
        password = _env.pop('password')

    client = MongoClient(**_env)
    if dbname:
        log.debug('using db={0}'.format(dbname))
        client = getattr(client, dbname)

    # authenticating
    if user and password:
        # if fails - throws exception which will be handled in run_stubo.py
        client.authenticate(user, password)
        log.info("Login to MongoDB successful!")

    return client
Example #30
0
def get_connection(env=None):
    """

    Gets MongoDB connection. If user and password provided - authenticates (logs in)
    :param env: dictionary, example:
            {'host': 'ds045454.mongolab.com',
             'tz_aware': True,
             'max_pool_size': 10,
             'port': 45454}
    :return: MongoClient
    """
    # checking for environment variables
    mongo_uri = os.getenv("MONGO_URI")
    mongo_db = os.getenv("MONGO_DB")
    if mongo_uri and mongo_db:
        client = MongoClient(mongo_uri)
        log.info("MongoDB environment variables found: %s!" % mongo_uri)
        return client[mongo_db]

    # environment variables not found, looking for details from configuration file
    env = env or default_env
    _env = env.copy()
    dbname = _env.pop("db", None)

    # if auth details supplied - getting details
    user = password = None
    if "user" in _env:
        user = _env.pop("user")
    if "password" in _env:
        password = _env.pop("password")

    client = MongoClient(**_env)
    if dbname:
        log.debug("using db={0}".format(dbname))
        client = getattr(client, dbname)

    # authenticating
    if user and password:
        # if fails - throws exception which will be handled in run_stubo.py
        client.authenticate(user, password)
        log.info("Login to MongoDB successful!")

    return client
    def getMaintenance(self):
        db = MongoClient(self.mongo_host, self.mongo_port)

        if self.mongo_user and self.mongo_password:
            db.authenticate(self.mongo_user, self.mongo_password)

        host_name = socket.gethostname()

        fsync_locked = int(db.is_locked)

        config = db.admin.command("replSetGetConfig", 1)
        for i in range(0, len(config['config']['members'])):
            if host_name in config['config']['members'][i]['host']:
                priority = config['config']['members'][i]['priority']
                hidden = int(config['config']['members'][i]['hidden'])

        self.addMetrics('mongodb.fsync-locked', fsync_locked)
        self.addMetrics('mongodb.priority', priority)
        self.addMetrics('mongodb.hidden', hidden)
Example #32
0
class MongoAgent(object):
    def __init__(self, host, port, dbname, username, password):
        self.db = MongoClient(host, port)[dbname]
        if username != "" and password != "":
            self.db.authenticate(username, password)

        self.calls_col = self.db["calls"]
        self.segments_col = self.db["segments"]
        self.sentences_col = self.db["sentences"]

    def iter_all(self, c, handler):
        for doc in c.find():
            if not handler(doc):
                break

    def find_one_by_key_value(self, c, key, value):
        return c.find_one({key: value})

    def coll(self, name):
        return self.db[name]
Example #33
0
def load_config(config):
    global _db
    global _es
    if _db==None:
        _db=MongoClient(host=config["host"],port=config["port"]).get_database(config["name"])
        if config.has_key("user") and config.get("user","")!="":
            if not _db.authenticate(config["user"],config["password"]):
                raise Exception("Authenticate error ast {0}:{1}".format(config["host"],config["port"]))
    if config.has_key("elasticsearch"):
        from elasticsearch import Elasticsearch
        if _es==None:
            _es=Elasticsearch(config["elasticsearch"])
Example #34
0
def load_config(config):
    global _db
    global _es
    if _db == None:
        _db = MongoClient(host=config["host"],
                          port=config["port"]).get_database(config["name"])
        if config.has_key("user") and config.get("user", "") != "":
            if not _db.authenticate(config["user"], config["password"]):
                raise Exception("Authenticate error ast {0}:{1}".format(
                    config["host"], config["port"]))
    if config.has_key("elasticsearch"):
        from elasticsearch import Elasticsearch
        if _es == None:
            _es = Elasticsearch(config["elasticsearch"])
Example #35
0
class Mongo(Driver):
    def _on_init(self):
        self._client = MongoClient(self._config.get(
            'host', 'localhost:27017'))[str(self._config.get('db', 'env'))]

        self._client.authenticate(str(self._config.user),
                                  password=str(self._config.password))

    def g(self, k):
        try:
            result = self._client.env_conf.find_one({'k': k})

            if result is not None:
                if self._config.rawValue:
                    return Value(result)
                else:
                    return Value(result.get('v'), result.get('t'))
        except BaseException as e:
            logger.critical(e, 'Mongo driver')

            pass

        raise NotExistsError("Key not exists: %s" % k)
Example #36
0
class Config(object):
    def __init__(self):
        self.MAX_LIMIT = 30
        self.MIN_LIMIT = 20

        logging.basicConfig(
            level=logging.DEBUG,
            format=
            "%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s",
            datefmt="%a, %d %b %Y %H:%M:%S")

        self.MGO = MongoClient(
            host=environ.get("MongoHost"),
            port=int(environ.get("MongoPort")))[environ.get("MongoDB")]
        self.MGO.authenticate(name=environ.get("MongoUser"),
                              password=environ.get("MongoPass"))

        AccessKeyId = environ.get('AccessKeyId')
        AccessKeySecret = environ.get('AccessKeySecret')
        OSS_Endpoint = environ.get('OSS_Endpoint')
        self.Bucket = oss2.Bucket(oss2.Auth(AccessKeyId, AccessKeySecret),
                                  OSS_Endpoint, "python-crawler")

    def Header(self, protocal, host):
        return {
            "User-Agent": random.choice(USER_AGENTS),
            "Origin": protocal + "://" + host,
            "Host": host,
            "Referer": protocal + "://" + host,
        }

    def RandomLimit(self):
        return random.choice(range(self.MIN_LIMIT, self.MAX_LIMIT))

    def Collection(self, filename):
        return path.splitext(path.basename(filename))[0][2:]
Example #37
0
def get_db(show_detail=False):
    if show_detail:
        print("Connect to DB: %s/%s" % (','.join(DB_HOST), DB_NAME))
        if DB_REPLSET:
            print("Replica Set: %s" % DB_REPLSET)
        if DB_USER:
            print("Login with user: %s" % DB_USER)
        else:
            print("Login without user.")
    if not DB_REPLSET and len(DB_HOST) > 1:
        print('Missing Replica set.')
        return None

    db = MongoClient(host=DB_HOST, replicaset=DB_REPLSET)[DB_NAME]

    if DB_USER != '':
        if not db.authenticate(DB_USER, DB_PWD):
            print('Database auth failed.')
            return None
    return db
if not MONGO_CLIENT:
    MONGO_PORT = int(getattr(settings, "MONGO_PORT", 27017))
    MONGO_HOST = getattr(settings, "MONGO_HOST", "localhost")
    MONGO_DB_NAME = getattr(settings, "MONGO_DB_NAME", "test")
    MONGO_DB_USER = getattr(settings, "MONGO_DB_USER", False)
    MONGO_DB_PASSWORD = getattr(settings, "MONGO_DB_PASSWORD", False)

    from pymongo import MongoClient

    MONGO_CLIENT = MongoClient(host=MONGO_HOST, port=MONGO_PORT)

    MONGO_CLIENT = MONGO_CLIENT[MONGO_DB_NAME]

    if MONGO_DB_USER and MONGO_DB_PASSWORD:
        MONGO_CLIENT.authenticate(MONGO_DB_USER, MONGO_DB_PASSWORD)

MONGO_DB_VERSION = MONGO_CLIENT.connection.server_info()["versionArray"]

if not float(".".join(MONGO_DB_VERSION[:2])) >= 2.2:
    raise ImproperlyConfigured(
        """
        Your mongodb service doesn't support TTL
        http://docs.mongodb.org/manual/tutorial/expire-data/
        """
    )

DB_COLLECTION = MONGO_CLIENT[MONGO_SESSIONS_COLLECTION]

MONGO_SESSIONS_INDEXES = DB_COLLECTION.index_information()
Example #39
0
    def predict(self):
        #args = parse_args() 
        prototxt = self.defining
        caffemodel = self.net
        anchors = self.anchors

        #print('Called with args:')
        #print(args)

        
        cfg_from_file(self.cfg)

        print('Using config:')
        pprint.pprint(cfg)

        while not os.path.exists(caffemodel):
            print('Waiting for {} to exist...'.format(caffemodel))
            time.sleep(10)
        
    
        caffe.set_mode_gpu()
        caffe.set_device(0)

        net = caffe.Net(prototxt, caffemodel, caffe.TEST)
        net.name = os.path.splitext(os.path.basename(caffemodel))[0]
        print '\n\nLoaded network {:s}'.format(caffemodel)
        
       
        #img_list = ['tr.jpg']
        #for img_name in img_list:
         #   print '~' * 20
          #  im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', img_name)
           # print 'Demo for image: `{:s}`'.format(im_file)
           # demo(net, im_file, anchors)

           # plt.show()
        db = MongoClient('10.76.0.143', 27017)['plate']
        db.authenticate('plateReader', 'IamReader')
        total_num=db.image.count()
       # total_num=1407
        #bad=open('bad_list.txt','r')
        bad=open('id_path.txt','r')
        #bad=open('id_path.txt','r')
        #print total_num
        #idlist=range(total_num)
        #print idlist
        error1=0
        f=open('re_predict_result_new.txt','w')
        for idlist in bad:
        #for i in bad:
           # num=int(i)-1
            #print count
            #print num
            #print type(num)
            re_num=re.split(' ',idlist)   
            #print re_num[0]
            num=int(re_num[0])
            #print num
            temp=db.image.find_one({'_id':int(num)})

            #temp=db.image.find_one({'_id',num+1})
            if temp:
                #temp=db.image.find_one({'_id':num+1})
                im_file=temp['path']
                img = exif.load_exif_jpg(im_file)
                width = 1280
                height = 720
                multi = img.shape[0]*img.shape[1]*1.0/(width*height)
                multi = math.sqrt(multi)
                print multi
                resized_img = cv2.resize(img,(int(img.shape[1]/multi), int(img.shape[0]/multi)))
                resized_img_name = os.path.join('plate_buffer', 'resized_img.jpg')
                cv2.imwrite(resized_img_name, resized_img)
                re_img = cv2.imread(resized_img_name)
                #e_img_size = re_img.shape

                print im_file
                try:
                    presicion= demo(net,resized_img_name,anchors)
                    print presicion
                    #print 'result'
                    #print presicion
                    #print int(presicion[0][2:3])
                    #print "a"
                    #print len(temp['points'])
                    #print "b"
                    if len(temp['points'])>0:
                        f.write(str(num)+' ')
                        #f.write(str(temp['points'][0][0][0:1][0])+' '+str(temp['points'][0][0][1:2][0])+' ')
                        #f.write(str(temp['points'][0][1][0:1][0])+' '+str(temp['points'][0][1][1:2][0])+' ')
                        #f.write(str(temp['points'][0][2][0:1][0])+' '+str(temp['points'][0][2][1:2][0])+' ')
                        #f.write(str(temp['points'][0][3][0:1][0])+' '+str(temp['points'][0][3][1:2][0])+' ')
                        #print 'flag1'
                        if len(presicion)!=0:
                            f.write('*'+' ')
                            f.write(str(int(presicion[0][2:3]*multi+1))+' '+str(int(presicion[0][1:2]*multi-1))+' ')
                            f.write(str(int(presicion[0][2:3]*multi+1))+' '+str(int(presicion[0][3:4]*multi+1))+' ')
                            f.write(str(int(presicion[0][0:1]*multi-1))+' '+str(int(presicion[0][3:4]*multi+1))+' ')
                            f.write(str(int(presicion[0][0:1]*multi-1))+' '+str(int(presicion[0][1:2]*multi-1)))
                            #error1=error1+1
                        else:
                            f.write('* '+'0 '+'0 '+'0 '+'0 '+'0 '+'0 '+'0 '+'0')
                        f.write('\n')
                   # print 'flags2'
                except:
                        print 'we are wrong'
                        pass
            #break           
        f.close()
Example #40
0
from pymongo import MongoClient, ReturnDocument
#f=open('pld.dat','w')

db = MongoClient('10.76.0.143', 27017)['plate']
db.authenticate('plateReader', 'IamReader')
#for post in db.image.find({'_id':'3'})
a=db.image.find_one({'_id':120762})
#f.write(str(a))
print a
b=db.image.find_one({'_id':524})
print b
c=db.image.find_one({'_id':1008})
print c
print db.image.count()
#f.close()

Example #41
0
from pymongo import MongoClient

db = MongoClient().get_database("DATA")
db.authenticate("twitterApplication","gdotwitter", source="admin")

print db.get_collection("Brexit_old").find({},{"retweetedStatus":1}).limit(1)[0]
Example #42
0
File: newdb.py Project: gaosh13/esl
        elif c == '(':
            ntr += tmp + '('
            state[0] = 1
            tmp = ''
        else:
            tmp += c

    return ntr


if __name__ == '__main__':
    fin = open("index.txt", "r")

    addr = '127.0.0.1'
    db = MongoClient(addr).test
    db.authenticate('test', 'test')
    posts = db.syntax2

    text = fin.read()
    index = text.split()
    info = []
    cnt = 0
    for file in index:
        fin1 = open(file, "r")
        text1 = fin1.read()
        print file
        sentences = parse_text_file(file, text1)
        cnt += 1
        for s in sentences:
            tr = make_tree(s[2])
            ntr = make_tree0(tr)
Example #43
0
from flask import Flask, render_template, jsonify, request
from random import randint
from pymongo import MongoClient
from bson.objectid import ObjectId

app = Flask(__name__)
app.Debug = True

db = MongoClient('mongodb://*****:*****@troup.mongohq.com:10079/piloto-tweets')['piloto-tweets']
db.authenticate('yamil', 'medalla')
tweets = db.tweets

@app.route('/', methods = ['GET'])
def view_tweets():
	return render_template('index.html', tweets = list(tweets.find()))

@app.route('/tweets', methods = ['GET'])
def return_all_tweets():
	return jsonify({"tweets": list(tweets.find())})

@app.route('/tweets', methods = ['POST'])
def create_tweet():
	# tweets.append({"id": randint(0,256), "text": request.form['tweet']})
	return jsonify({'_id': str(tweets.insert({'text': request.form['tweet']}))})

@app.route('/tweets/<string:id>', methods = ['GET', 'DELETE'])
def handle_single_tweet(id): #bregar con un solo tweet para ver si lo borra o lo muestra 
	if request.method == 'DELETE':
		return jsonify({tweets.remove({'_id': ObjectId(id)})})
	else:
		return jsonify(tweets.find_one({'_id': ObjectId(id)}))
Example #44
0
from pymongo import MongoClient as Connection
from config import DB_HOST, MONGO_AUTHENTICATE

MongoDB= Connection(DB_HOST, wtimeout=1).dragon
MongoDB.authenticate(MONGO_AUTHENTICATE['id'], MONGO_AUTHENTICATE['passwd'])
Example #45
0
from pprint import pprint
from pymongo import MongoClient
from bson import ObjectId

dbuser = "******"
dbpasswd = "monitor678"
remote = MongoClient("nuev9.com")['monitor']
remote.authenticate(dbuser, dbpasswd)
local = MongoClient()['monitor']

"""
laccs = set([x['_id'] for x in local.accounts.find()])
accs = remote.accounts.find({"_id": ObjectId("54f4b34941b84c23d147babd")})
#pprint(accs[0])

for acc in accs:
    #if acc['_id'] not in laccs:
    local.accounts.save(acc)
"""

acc = remote.accounts.find_one({"_id": ObjectId("54f4b34941b84c23d147babd")})
#pprint (acc)
#acc['campaigns'] = {"iae1": acc['campaigns']['54f4b32141b84c23d147babc']}
#acc['name'] = "INSIGHT-DRINKS-IAE-GRUPO-1"
#acc['users'] = {'iae1': {u'access': u'admin', u'password': u'', u'username': u'iae1'}}
campaign = acc["campaigns"]["54f4b32141b84c23d147babc"]
for x in range(9,10):
    newacc = acc
    newacc["_id"] = ObjectId()
    newacc["campaigns"] = {"iae%s"%x: campaign}
    newacc["name"] = "INSIGHT-DRINKS-IAE-GRUPO-%s" % x
Example #46
0
    def predict(self):
        #args = parse_args() 
        prototxt = self.defining
        caffemodel = self.net
        anchors = self.anchors

        #print('Called with args:')
        #print(args)

        
        cfg_from_file(self.cfg)

        print('Using config:')
        pprint.pprint(cfg)

        while not os.path.exists(caffemodel):
            print('Waiting for {} to exist...'.format(caffemodel))
            time.sleep(10)
        
    
        caffe.set_mode_gpu()
        caffe.set_device(0)

        net = caffe.Net(prototxt, caffemodel, caffe.TEST)
        net.name = os.path.splitext(os.path.basename(caffemodel))[0]
        print '\n\nLoaded network {:s}'.format(caffemodel)
        
       
        #img_list = ['tr.jpg']
        #for img_name in img_list:
         #   print '~' * 20
          #  im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', img_name)
           # print 'Demo for image: `{:s}`'.format(im_file)
           # demo(net, im_file, anchors)

           # plt.show()
        db = MongoClient('10.76.0.143', 27017)['plate']
        db.authenticate('plateReader', 'IamReader')
        total_num=db.image.count()
        total_num=103470
        #bad=open('bad_list.txt','r')

        #idlist=range(total_num)
        #print idlist
        error1=0
        f=open('predict_result1.txt','w')
        for num in range(total_num):
        #for i in bad:
           # num=int(i)-1
            #print count
            temp=db.image.find_one({'_id':num+1})
            #temp=db.image.find_one({'_id',num+1})
            if temp:
                #temp=db.image.find_one({'_id':num+1})
                im_file=temp['path']
                print im_file
                try:
                    presicion= demo(net,im_file,anchors)
                    print presicion
                    #print 'result'
                    #print presicion
                    #print int(presicion[0][2:3])
                    #print temp['points']
                    if len(temp['points'])>0:
                        f.write(str(num+1)+' ')
                        f.write(str(temp['points'][0][0][0:1][0])+' '+str(temp['points'][0][0][1:2][0])+' ')
                        f.write(str(temp['points'][0][1][0:1][0])+' '+str(temp['points'][0][1][1:2][0])+' ')
                        f.write(str(temp['points'][0][2][0:1][0])+' '+str(temp['points'][0][2][1:2][0])+' ')
                        f.write(str(temp['points'][0][3][0:1][0])+' '+str(temp['points'][0][3][1:2][0])+' ')
                        if len(presicion)!=0:
                            f.write('*'+' ')
                            f.write(str(int(presicion[0][2:3]))+' '+str(int(presicion[0][1:2]))+' ')
                            f.write(str(int(presicion[0][2:3]))+' '+str(int(presicion[0][3:4]))+' ')
                            f.write(str(int(presicion[0][0:1]))+' '+str(int(presicion[0][3:4]))+' ')
                            f.write(str(int(presicion[0][0:1]))+' '+str(int(presicion[0][1:2])))
                            #error1=error1+1
                        else:
                            f.write('* '+'0 '+'0 '+'0 '+'0 '+'0 '+'0 '+'0 '+'0')
                        f.write('\n') 
                except:
                        print 'we are wrong'
                        pass
Example #47
0
class DotaDB:
    def __init__(self):
        self.db = MongoClient().dota2
        self.db.authenticate("<REPLACE WITH YOUR MONGO USER>","<REPLACE WITH YOUR MONGO PASSWORD>")
        self.matches = self.db.matches
        self.heroes = self.db.heroes
        self.players = self.db.players
        self.dotaApi = DotaApi()

    def vanityTo64(self,vanityName):
        """returns the 64 bit steam value thing"""
        player = self.players.find_one({"name":vanityName})
        if not player:
           player = self.createPlayer(vanityName) 
        return int(player["steam64"])

    def vanityTo32(self,vanityName):
        """returns the 32 bit steam value thing"""
        player = self.players.find_one({"name":vanityName})
        if not player:
           player = self.createPlayer(vanityName) 
        return int(player["steam32"])

    def steam32ToVanity(self,steam32,useApi=True):
        """ returns the vanity name of someone """
        if steam32 == 4294967295:
            return "Unknown"
        name = self.players.find_one({"steam32": steam32})
        if name:
            return name["name"]
        if not useApi:
            return False
        apiName = self.dotaApi.getPlayerBySteam32(steam32)
        if apiName:
            return apiName["response"]["players"][0]["personaname"]
        else:
            return False

    def heroIdtoName(self,id):
        """fetches heroes from the database"""
        heroes = self.heroes.find_one()
        if not heroes or int(time.time()) - heroes["lastUpdate"] > 86400:
            res = self.dotaApi.getheroes()
            if not res.has_key("error"):
                self.heroes.remove()
                self.heroes.save({"lastUpdate": int(time.time()), "data": res})
                heroes = self.heroes.find_one()
        for heroe in heroes["data"]["result"]["heroes"]:
            if heroe["id"] == id:
                return heroe["localized_name"]
    
    def createPlayer(self,vanityName,steam64=None):
        """creates the user in the database or updates it for watever reason"""
        if steam64:
            apiPlayer = {"64bits":steam64,"32bits":int(steam64)-76561197960265728}
        else:
            apiPlayer = self.dotaApi.getPlayerByName(vanityName)
        self.db.players.remove({"name":vanityName.lower()})
        self.db.players.save({"name":vanityName,"lastUpdated":int(time.time()),"steam64":apiPlayer["64bits"],"steam32":apiPlayer["32bits"]})
        return self.db.players.find_one({"name":vanityName})
    
    def getMatch(self,matchId):
        """returns the match from the database OR fetches it from the API and stores it on the database"""
        match = self.matches.find_one({"match_id":matchId})
        if not match:
            apiMatch = self.dotaApi.getMatch(matchId)
            if apiMatch.has_key("error"):
                return None
            else:
                self.matches.save({"match_id":matchId,"match_data": apiMatch})
                match = self.matches.find_one({"match_id":matchId})
        return match["match_data"]["result"]

    def getMatches(self,vanityName):
        """ returns an array of matches given the steam vanity name """
        player = self.players.find_one({"name":vanityName})
        if player and player.has_key("matches") and int(time.time()) - player["matches"]["lastUpdate"] < 3600:
            return player["matches"]["data"]
        else:
            matches = self.dotaApi.getmatches(self.vanityTo32(vanityName),None,5)
            if type(matches) is dict and matches.has_key("error"):
                return None
            else:
                self.players.update({"name":vanityName},{"$set": {"matches": { "data": matches["matches"],"lastUpdate": int(time.time())}}})
        return self.players.find_one({"name":vanityName})["matches"]["data"]

    def getFullMatchesList(self,vanityName,download=True):
        """ generates a list in the database of matches """
        needMore = True
        startingMatch = None
        list = self.players.find_one({"name":vanityName})
        if list and list.has_key("fullMatchesList"):
            list = list["fullMatchesList"]
        else:
            list = []
        if not download:
            return list
        while(needMore):
            apiRes = self.dotaApi.getmatches(self.vanityTo32(vanityName),startingMatch,25)
            if apiRes:
                if apiRes["results_remaining"] == 0:
                    needMore = False
                apiList = [x["match_id"] for x in apiRes["matches"]]
                newList = []
                for i in apiList:
                    if list.count(i) == 0:
                        newList.append(i)
                    else:
                        needMore = False
                startingMatch = i
                list = newList + list
            else:
                return False
        self.players.update({"name":vanityName},{"$set" : {"fullMatchesList": list }})
        return list
    
    def matchGamesList(self,list):
        """ returns the full games list"""
        return self.matches.find({"match_id": {"$in": list}})
   
    def wipeGames(self,list):
        """ removes the games from this player from the database"""
        return self.matches.remove({"match_id": {"$in": list}})
 
    def delUser(self,vanityName):
        """ deletes an user from the database"""
        self.players.remove({"name":vanityName})
        return True
Example #48
0
class Crawler(Spider):
    name = "2"
    allowed_domains = ["scholar.google.com"]

    def __init__(self):
        self.db = MongoClient("166.111.7.105", 30017)["bigsci"]
        self.db.authenticate("", "")

        conf = open("conf").readlines()
        profile_db = conf[1].strip()
        limit = int(conf[0].strip())
        self.profile = self.db[profile_db]
        self.start_urls = []
        self._id = {}
        self.count = 0
        t = datetime.now()
        tmp = {
            "year": t.year,
            "month": t.month,
            "day": t.day,
            "hour": t.hour,
            "minute": t.minute,
            "second": t.second,
            "microsecond": t.microsecond,
        }
        self.fr = []
        for i in self.profile.find({}, {"_id": 1, "ID": 1, "url": 1, "token": 1}):
            self.fr.append(i)
        random.shuffle(self.fr)
        self.length = len(self.fr)
        print "Total author count: %d" % self.length
        self.cnt = 0
        while self.cnt < self.length:
            self.entry = self.fr[self.cnt]
            if not self.isVaild(tmp):
                self._id[self.entry["ID"]] = self.entry["_id"]
                url = self.entry["url"] + "&cstart=0&pagesize=100"
                self.start_urls.append(url)
            self.cnt += 1
        print "total url count: %d" % len(self.start_urls)

        self.proxies = []
        self.request20proxy = "http://erwx.daili666.com/ip/?tid=558045424788230&num=20&foreign=only"
        self.request1proxy = "http://erwx.daili666.com/ip/?tid=558045424788230&num=1&foreign=only"
        proxy = urllib.urlopen(self.request20proxy)
        for line in proxy.readlines():
            self.proxies.append("http://" + line.strip())
            # self.limit = len(self.idList)
            # self.entry = self.profile.find_one({"_id":self.idList[self.ptr]})
            # while self.isVaild(tmp):
            # 	print "Skip %s" % str(self.entry["_id"])
            # 	self.ptr += 1
            # 	if self.ptr >= self.limit:
            # 		self.entry = None
            # 		break
            # 	self.entry = self.profile.find_one({"_id":self.idList[self.ptr]})
            # if self.entry is not None:
            # 	self.entry['token'] = tmp
            # 	self.entry['pubs'] = []
            # 	self.start_urls = [self.entry['url'] + '&cstart=0&pagesize=100',]
            # 	print "Start Processing %s" % str(self.entry["_id"])

    def choose_proxy(self):
        idx = random.randint(0, 19)
        if not self.test_proxy(self.proxies[idx]):
            proxy = urllib.urlopen(self.request1proxy)
            self.proxies[idx] = "http://" + proxy.readlines()[0].strip()
            print "Proxy " + self.proxies[idx] + " is added."
        return self.proxies[idx]

    def test_proxy(self, proxy):
        socket.setdefaulttimeout(3.0)
        test_url = "http://scholar.google.com"
        try:
            f = urllib.urlopen(test_url, proxies={"http": ":@" + proxy})
        except:
            print "Proxy " + proxy + " fails!"
            return False
        else:
            if f.getcode() != "200":
                print "Proxy " + proxy + " fails!"
                return False
            else:
                return True

    def make_requests_from_url(self, url):
        request = Request(url, callback=self.parse)
        request.meta["proxy"] = self.choose_proxy()
        request.headers["Proxy-Authorization"] = ""
        return request

    def parse(self, response):
        sel = Selector(response)

        url = response.url
        idx = url.find("user")
        _id = url[idx + 5 : idx + 17]

        item = CrawlerItem()
        item["url"] = url
        item["_id"] = self._id[_id]
        # item['ID'] = _id
        # item['url'] = response.url
        # item['name'] = sel.xpath('//div[@id="gsc_prf_in"]/text()').extract()[0]
        # item['info'] = sel.xpath('//div[@class="gsc_prf_il"]/text()').extract()[0]
        t = datetime.now()
        item["token"] = {
            "year": t.year,
            "month": t.month,
            "day": t.day,
            "hour": t.hour,
            "minute": t.minute,
            "second": t.second,
            "microsecond": t.microsecond,
        }
        tmp = sel.xpath('//tbody[@id="gsc_a_b"]/tr[@class="gsc_a_tr"]/td[@class="gsc_a_t"]/a/text()').extract()
        item["pubs"] = []
        n = len(tmp)
        for i in range(1, n + 1):
            pub = {}
            pub["title"] = sel.xpath(
                '//tbody[@id="gsc_a_b"]/tr[@class="gsc_a_tr"][%d]/td[@class="gsc_a_t"]/a/text()' % i
            ).extract()
            pub["url"] = sel.xpath(
                '//tbody[@id="gsc_a_b"]/tr[@class="gsc_a_tr"][%d]/td[@class="gsc_a_t"]/a/@href' % i
            ).extract()
            pub["author"] = sel.xpath(
                '//tbody[@id="gsc_a_b"]/tr[@class="gsc_a_tr"][%d]/td[@class="gsc_a_t"]/div[1]/text()' % i
            ).extract()
            pub["venue"] = sel.xpath(
                '//tbody[@id="gsc_a_b"]/tr[@class="gsc_a_tr"][%d]/td[@class="gsc_a_t"]/div[2]/text()' % i
            ).extract()
            pub["citation"] = sel.xpath(
                '//tbody[@id="gsc_a_b"]/tr[@class="gsc_a_tr"][%d]/td[@class="gsc_a_c"]/a/text()' % i
            ).extract()
            pub["year"] = sel.xpath(
                '//tbody[@id="gsc_a_b"]/tr[@class="gsc_a_tr"][%d]/td[@class="gsc_a_y"]/span/text()' % i
            ).extract()
            item["pubs"].append(pub)
        self.count += 1
        print "current count: %d,total count: %d" % (self.count, self.length)
        yield item

        if n == 100:
            offset = 0
            d = 0
            idx = url.find("cstart=")
            idx += 7
            while url[idx].isdigit():
                offset = offset * 10 + int(url[idx])
                idx += 1
                d += 1
            request = Request(url[: idx - d] + str(offset + 100) + "&pagesize=100", callback=self.parse)
            request.meta["proxy"] = self.choose_proxy()
            request.headers["Proxy-Authorization"] = ""
            yield request
            # else:
            # 	yield item
            # 	self.ptr += 1
            # 	if self.ptr >= self.limit:
            # 		self.entry = None
            # 	else:
            # 		self.entry = self.profile.find_one({"_id":self.idList[self.ptr]})
            # 	while self.isVaild(item['token']):
            # 		print "Skip %s" % str(self.entry["_id"])
            # 		self.ptr += 1
            # 		if self.ptr >= self.limit:
            # 			self.entry = None
            # 			break
            # 		self.entry = self.profile.find_one({"_id":self.idList[self.ptr]})
            # 	if self.entry is not None:
            # 		self.entry['token'] = item['token']
            # 		self.entry['pubs'] = []
            # 		yield Request(self.entry['url'] + '&cstart=0&pagesize=100', callback = self.parse, meta={'proxy'='http://localhost:3128'})
            # 		print "Start Processing %s" % str(self.entry["_id"])

    def isVaild(self, new):
        if self.entry is None:
            print "Invaild item"
            return False
        if not self.entry.has_key("token"):
            print "No token"
            return False
        t = self.entry["token"]
        old = datetime(t["year"], t["month"], t["day"], t["hour"], t["minute"], t["second"], t["microsecond"])
        t = new
        new = datetime(t["year"], t["month"], t["day"], t["hour"], t["minute"], t["second"], t["microsecond"])
        d = new - old
        print "Have passed %d days after last update" % d.days
        return d.days <= 50
import sys
from pymongo import MongoClient
from Crypto.Hash import SHA256

#####################################
# main program                      #
#####################################

mongoURL = sys.argv[1]
mongoEAEPassword = sys.argv[2]
adminPwd = sys.argv[3]

hash = SHA256.new()
hash.update("eae")

db = MongoClient('mongodb://' + mongoURL + '/').eae
db.authenticate('eae', mongoEAEPassword, mechanism='SCRAM-SHA-1')

adminUser = {"username": "******",
             "password": hash.hexdigest()
             }

db.users.insert_one(adminUser)
Example #50
0
# -*- coding: utf-8 -*-

from pymongo import MongoClient

db = MongoClient("localhost", 27017).billionaire
name = raw_input("name: ")
pw   = raw_input("pw: ")
db.authenticate(name, pw)

print db.gift.ensure_index("uid")

print db.sponsor.ensure_index("uid")
Example #51
0
class setting:
  def __init__(self,cfg):
    self.logined = False
    self.cfg = cfg
    self.client = MongoClient(cfg['db_ip'],cfg['db_port']).bot
    self.client.authenticate(cfg['db_user'],cfg['db_pass'])
    self.logined = False
  def login(self,user,password,id):
     if self.logined:
       return True
     userdatas = self.client.user.find({"name":user})
     if userdatas.count() == 0:
       return False
     else:
       currect = userdatas[0]['pass']
       import hashlib
       m = hashlib.md5()
       m.update(currect + str(id))
       currect = m.hexdigest()
       if currect == password:
         self.user = user
         self.logined = True
         return True
       else:
         return False
  def getdata(self):
    if self.logined:
      userdata = self.client.user.find({"name":self.user})[0]
      return userdata['data']
    else:
      return None
  def cleck_user(self,name):
    userdatas = self.client.user.find({"name":name})
    if userdatas.count() == 0:
      return False
    else:
      return True
  def register(self,name):
    self.client.user.insert_one({
      'name': name
    })
  def change_nick(self,who,twitchid,nick):
    
    try:
      nick_userdata = self.client.user.find({"name": who})[0]['data']['spe_name']
    except:
      self.client.user.update_one({"name": who},{"$set":{"data.spe_name":[]}})
      nick_userdata = self.client.user.find({"name": who})[0]['data']['spe_name']
    exist = False
    for nickdata in nick_userdata:
      if nickdata['name'] == twitchid:
        nickdata['nick'] = nick
        exist = True
        break
    if not exist:
      nick_userdata.append({
        'name': twitchid,
        'nick': nick
      })
    self.client.user.update_one({"name": who},{'$set':{'data.spe_name':nick_userdata}})
    return exist
  def rm_nick(self,who,twitch):
    try:
      nick_userdata = self.client.user.find({"name": who})[0]['data']['spe_name']
    except:
      self.client.user.update_one({"name": who},{"$set":{"data.spe_name":[]}})
      nick_userdata = self.client.user.find({"name": who})[0]['data']['spe_name']
    exist = False
    for i in xrange(len(nick_userdata)):
      if nick_userdata[i]['name'] == twitch:
        nick_userdata.pop(i)
        exist = True
        break
    self.client.user.update_one({"name": who},{'$set':{'data.spe_name':nick_userdata}})
    return exist
  def nick(self,room,id):
    try:
      nick_userdata = self.client.user.find({"name": room})[0]['data']['spe_name']
      for nickdata in nick_userdata:
        if nickdata['name'] == id:
          return nickdata['nick']
          break
      return id
    except:
      return id
  def token_check(self,user,token):
    userdata = self.client.user.find({"name": user,"room_token": token})
    if userdata.count() == 0:
      return False
    else:
      return True
  def get_room_token(self,user):
    userdata = self.client.user.find({"name": user})
    if userdata[0].has_key('room_token'):
      return userdata[0]['room_token']
    else:
      token = self.rd(50)
      self.client.user.update_one({"name": user},{'$set':{'room_token':token}})
      return token
  def rd(self,N):
    import random,string
    return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(N))
  def canEditNickInRoom(self,user):
    userdata = self.client.user.find({"name": user})
    if userdata[0].has_key('room_edit_nick'):
      return userdata[0]['room_edit_nick']
    else:
      self.client.user.update_one({"name": user},{'$set':{'room_edit_nick':False}})
      return False
  def setEditNickInRoom(self,user,value):
    self.client.user.update_one({"name": user},{'$set':{'room_edit_nick':value}})
  def cando(self,user,dowhat):
    userdata = self.client.user.find({"name": user})
    if userdata[0].has_key(dowhat):
      return userdata[0][dowhat]
    else:
      self.client.user.update_one({"name": user},{'$set':{dowhat:False}})
      return False
  def setCanDo(self,user,dowhat,value):
    self.client.user.update_one({"name": user},{'$set':{dowhat:value}})