Beispiel #1
0
def savenumbers(numbers=[]):
    itemsLen = len(numbers)
    successLen = 0
    # client = MongoClient(configureRead.getDBValue('mongodb', 'db_host'), int(configureRead.getDBValue('mongodb', 'db_port')))
    client = MongoClient(configureRead.getDBValue('mongodb', 'db_url'))
    # client = MongoClient('mongodb://localhost:27017/')

    # db = client['test-database']
    # collection = db['test-collection']
    db = client[configureRead.getDBValue('mongodb', 'db_name')]

    table_name = 'UserWhiteList'

    for number in numbers:

        try:
            db[table_name].save({'_id': number, 'valid': 1})
            successLen += 1
        except AttributeError as error:
            logger.error(str(error) + " savenumbers :")

            continue
        except error:
            logger.error(str(error) + " savenumbers :")
            continue
    client.close()
    return (itemsLen, successLen)
Beispiel #2
0
def find_newsthumbnail():
    client = MongoClient(configureRead.getDBValue('mongodb', 'db_url'))
    db = client[configureRead.getDBValue('mongodb', 'db_name')]
    table_name = configureRead.getDBValue('mongodb', 'db_table_newsthumbnail')
    start = datetime.utcnow() - timedelta(days=1)
    contents = db[table_name].find({
        'imageUrl': None,
        'type': 1,
        'createDate': {
            '$gte': start
        }
    })
    items = []
    for content in contents:
        item = Item.Item()
        item.URL = content.get("_id")
        items.append(item)
    return items
Beispiel #3
0
def saveSiteThumbnail(pages=[], time_type='UTC'):
    itemsLen = len(pages)
    successLen = 0
    # client = MongoClient(configureRead.getDBValue('mongodb', 'db_host'), int(configureRead.getDBValue('mongodb', 'db_port')))
    client = MongoClient(configureRead.getDBValue('mongodb', 'db_url'))
    # client = MongoClient('mongodb://localhost:27017/')

    # db = client['test-database']
    # collection = db['test-collection']
    db = client[configureRead.getDBValue('mongodb', 'db_name')]

    table_name = configureRead.getDBValue('mongodb', 'db_table_newsthumbnail')
    # db.authenticate(configureRead.getDBValue('mongodb', 'db_user'),configureRead.getDBValue('mongodb', 'db_pass'))
    logger.info('save size :' + str(len(pages)))
    for page in pages:

        try:
            db[table_name].save({
                '_id': str(page.URL),
                'imageUrl': page.image_path,
                'digest': cgi.escape(page.digest),
                'title': cgi.escape(page.Name),
                'type': 0,
                'siteUrl': cgi.escape(page.SiteURL),
                'fetchDate': utcTime.getCurrentTime(),
                'shardkey': int(page.shardkey)
            })
            successLen += 1
        except AttributeError as error:
            url = ''
            if (page):
                url = page.URL
            logger.error(str(error) + " saveThumbnail :" + url)

            continue
        except error:
            url = ''
            if (page):
                url = page.URL
            logger.error(str(error) + " saveThumbnail :" + url)
            continue
    client.close()
    return (itemsLen, successLen)
Beispiel #4
0
def update_not_handled_Thumbnail(pages=[], time_type='UTC'):
    itemsLen = len(pages)
    successLen = 0
    # client = MongoClient(configureRead.getDBValue('mongodb', 'db_host'), int(configureRead.getDBValue('mongodb', 'db_port')))
    client = MongoClient(configureRead.getDBValue('mongodb', 'db_url'))
    # client = MongoClient('mongodb://localhost:27017/')

    # db = client['test-database']
    # collection = db['test-collection']
    db = client[configureRead.getDBValue('mongodb', 'db_name')]

    table_name = configureRead.getDBValue('mongodb', 'db_table_newsthumbnail')
    # db.authenticate(configureRead.getDBValue('mongodb', 'db_user'),configureRead.getDBValue('mongodb', 'db_pass'))
    logger.info('save size :' + str(len(pages)))
    for page in pages:

        try:
            db[table_name].update({'_id': str(page.URL)},
                                  {"$set": {
                                      'type': 2
                                  }}, False, True)
            successLen += 1
        except AttributeError as error:
            url = ''
            if (page):
                url = page.URL
            logger.error(str(error) + " update_not_handled_Thumbnail :" + url)

            continue
        except error:
            url = ''
            if (page):
                url = page.URL
            logger.error(str(error) + " update_not_handled_Thumbnail :" + url)
            continue
    client.close()
    return (itemsLen, successLen)
Beispiel #5
0
def save(pages=[], time_type='MDY_FORMAT'):
    itemsLen = len(pages)
    successLen = 0

    logging.info('start save size :' + str(len(pages)))
    #client = MongoClient(configureRead.getDBValue('mongodb', 'db_host'), int(configureRead.getDBValue('mongodb', 'db_port')))
    client = MongoClient(configureRead.getDBValue('mongodb', 'db_url'))
    #client = MongoClient('mongodb://localhost:27017/')

    #db = client['test-database']
    #collection = db['test-collection']
    db = client[configureRead.getDBValue('mongodb', 'db_name')]

    table_name = configureRead.getDBValue('mongodb', 'db_table_newsInfo')
    #db.authenticate(configureRead.getDBValue('mongodb', 'db_user'),configureRead.getDBValue('mongodb', 'db_pass'))
    logging.info('save size :' + str(len(pages)))
    tmpItem = []
    for page in pages:
        try:
            #db.NewsInfo.insert({'id':item.id,'title':cgi.escape(item.Name),'content':cgi.escape(item.content),'priority':item.Priority,'publishTime':item.publishTime,'retriveTime':utcTime.getCurrentTime(),'category':item.Subject,'fetchURL':cgi.escape(item.URL),'imagePath':cgi.escape(item.ImagePath)})
            if (not page.image_path):
                page.Priority = 1
            if (not page.time_type):
                db[table_name].insert({
                    '_id':
                    str(page.id),
                    'title':
                    cgi.escape(page.Name),
                    'digest':
                    cgi.escape(page.digest),
                    'priority':
                    page.Priority,
                    'newsDate':
                    utcTime.converter(page.PublishTime, time_type),
                    'fetchDate':
                    utcTime.getCurrentTime(),
                    'category':
                    page.Subject,
                    'url':
                    cgi.escape(page.URL),  #设置unit key
                    'imagePaths':
                    page.image_path,
                    'loc':
                    page.loc,
                    'cc':
                    page.cc,
                    'shardkey':
                    int(page.shardkey),
                    'siteURL':
                    page.SiteURL,
                    'inBlackList':
                    0
                })
            else:
                db[table_name].insert({
                    '_id':
                    str(page.id),
                    'title':
                    cgi.escape(page.Name),
                    'digest':
                    cgi.escape(page.digest),
                    'priority':
                    page.Priority,
                    'newsDate':
                    utcTime.converter(page.PublishTime, page.time_type),
                    'fetchDate':
                    utcTime.getCurrentTime(),
                    'category':
                    page.Subject,
                    'url':
                    cgi.escape(page.URL),  #设置unit key
                    'imagePaths':
                    page.image_path,
                    'loc':
                    page.loc,
                    'cc':
                    page.cc,
                    'shardkey':
                    int(page.shardkey),
                    'siteURL':
                    page.SiteURL,
                    'inBlackList':
                    0
                })
            tmpItem.append(
                (page.JobName, page.URL, utcTime.getCurrentTime4UTC(), page.id,
                 page.Subject, page.SiteURL))
            successLen += 1
        except AttributeError as e:
            url = ''
            if (page):
                url = page.URL
            logging.error(str(e.message) + " URL :" + url)

            continue
        except Exception as e:
            url = ''
            if (page):
                url = page.URL
            logging.error(str(e.message) + " URL :" + url)
            continue
    client.close()
    return (itemsLen, successLen)