Beispiel #1
0
# coding=utf-8
import os
import MySQLdb
from hashlib import md5
from BeautifulSoup import BeautifulSoup
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from services.db import MySQLdbWrapper

_db = MySQLdbWrapper()


def init_file(file):
    dic = {
        'id': file[0],
        'name': file[1],
        'icon_link': file[2],
        'icon_path': file[3],
        'source': file[4],
        'source_link': file[5],
        'rating': file[6],
        'version': file[7],
        'developer': file[8],
        'sdk_support': file[9],
        'category': file[10],
        #           'screen_support':file[11],
        'screen_support': None,
        'apk_size': file[12],
        'language': file[13],
        'publish_date': file[14],
        'downloads': file[15],
Beispiel #2
0
_seed_url_list = []

# check and collect valid url
for i in range(1000):
    url = _base_url % ('2', str(i + 1))
    _seed_url_list.append(url)

for i in range(2000):
    url = _base_url % ('1', str(i + 1))
    _seed_url_list.append(url)

import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from services.db import MySQLdbWrapper
cursor = MySQLdbWrapper().cursor()

insert_count = 0
update_count = 0
for seed_url in _seed_url_list:
    # the priority maybe update to failed/missing
    select_sql = "select priority from new_link where source='zhushou.360.cn' and link='%s'" % seed_url
    cursor.execute(select_sql)
    results = cursor.fetchall()
    if len(results) == 0:
        insert_sql = "insert into new_link (id, source, link, last_crawl, priority) values ('%s', 'zhushou.360.cn', '%s', 1, 10);" % (
            md5(seed_url).hexdigest().upper(), seed_url)
        cursor.execute(insert_sql)
        _conn.commit()
        insert_count += 1
    # if priority <> 10, the link has been reported as failed or missing etc. , so update it to normal
Beispiel #3
0
import MySQLdb
import smtplib
import time
import datetime
import os
import pickle
from platform import node

import sys
reload(sys)
getattr(sys, 'setdefaultencoding')('utf-8')

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from services.db import MySQLdbWrapper

_db = MySQLdbWrapper()
cursor = _db.cursor()

results = []
last_date = int(time.time()) - 60 * 60 * 24
date_from = datetime.datetime.strptime(
    (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y%m%d'),
    '%Y%m%d')
date_to = date_from + datetime.timedelta(days=1)
#now = datetime.datetime.now()
#last_date = int(datetime.datetime.strptime(now.strftime('%Y-%m-%d'), '%Y-%m-%d').strftime('%s'))
#sql = 'SELECT a.source,COUNT(DISTINCT b.package_name) AS count_p  FROM app a JOIN final_app b ON a.source_link = b.source_link where a.tag > %d GROUP BY a.source;' % last_date
sql = 'SELECT a.source,COUNT(DISTINCT b.package_name) AS count_p FROM app a JOIN final_app b ON a.source_link = b.source_link where b.created_at between %s and %s GROUP BY a.source;'
cursor.execute(sql, (date_from, date_to))
results_all = cursor.fetchall()
Beispiel #4
0
def get_db():
    if hasattr(_db, 'db'):
        return _db.db
    else:
        _db.db = MySQLdbWrapper()
        return _db.db
Beispiel #5
0
import smtplib
import time
import datetime
import os
import pickle
from platform import node


import sys
reload(sys)
getattr(sys, 'setdefaultencoding')('utf-8')

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from services.db import MySQLdbWrapper

_db = MySQLdbWrapper()
cursor = _db.cursor()


results = []
last_date = int(time.time()) - 60 * 60 * 24
date_from = datetime.datetime.strptime(
    (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y%m%d'),
    '%Y%m%d')
date_to = date_from + datetime.timedelta(days=1)
#now = datetime.datetime.now()
#last_date = int(datetime.datetime.strptime(now.strftime('%Y-%m-%d'), '%Y-%m-%d').strftime('%s'))
#sql = 'SELECT a.source,COUNT(DISTINCT b.package_name) AS count_p  FROM app a JOIN final_app b ON a.source_link = b.source_link where a.tag > %d GROUP BY a.source;' % last_date
sql = 'SELECT a.source,COUNT(DISTINCT b.package_name) AS count_p FROM app a JOIN final_app b ON a.source_link = b.source_link where b.created_at between %s and %s GROUP BY a.source;'
cursor.execute(sql, (date_from, date_to))
results_all = cursor.fetchall()
Beispiel #6
0
_seed_url_list = []

# check and collect valid url
for i in range(1000):
    url = _base_url % ('2', str(i + 1))
    _seed_url_list.append(url)

for i in range(2000):
    url = _base_url % ('1', str(i + 1))
    _seed_url_list.append(url)

import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from services.db import MySQLdbWrapper
cursor = MySQLdbWrapper().cursor()

insert_count = 0
update_count = 0
for seed_url in _seed_url_list:
    # the priority maybe update to failed/missing
    select_sql = "select priority from new_link where source='zhushou.360.cn' and link='%s'" % seed_url
    cursor.execute(select_sql)
    results = cursor.fetchall()
    if len(results) == 0:
        insert_sql = "insert into new_link (id, source, link, last_crawl, priority) values ('%s', 'zhushou.360.cn', '%s', 1, 10);" % (
            md5(seed_url).hexdigest().upper(),
            seed_url)
        cursor.execute(insert_sql)
        _conn.commit()
        insert_count += 1