Ejemplo n.º 1
0
    def __init__(self):
        self.set_monitor_period_sec = 3
        self.set_log_file_path = 'monitor_files.log'
        self.logHandle = LogHandle(self.set_log_file_path)
        self.set_store_path = ''

        self.lock = threading.Lock()
        self.monitor_file_list = list()
        self.change_file_list = list()

        self.alive_thread_cnt = 0
        self.need_quit = False

        pass
Ejemplo n.º 2
0
    def do_init(self):
        self.db_handler = DBHandler()
        self.db_handler.load('sex.db')
        self.db_handler.add_table('sex')

        self.set_store_img_path = 'img'
        self.flag_quit = False
        self.log_handler = LogHandle('sex_srap.log')
        self.log = self.log_handler.log

        self.set_thread_cnt = 6

        self.info_run_thread_cnt = 0
        self.info_succeed_cnt = 0
        self.info_failed_cnt = 0

        self.task_row_list = list()
        self.task_update_row_list = list()
        self.lock = threading.RLock()
        if not os.path.exists(self.set_store_img_path):
            os.mkdir(self.set_store_img_path)
        pass
Ejemplo n.º 3
0
    pass


def arg_parser_init():
    arg_parse = MyArgParse()
    arg_parse.add_option('-parse', [0, 1], 'parse img url')
    arg_parse.add_option('-url', [0, 1], 'parse img url')
    arg_parse.add_option('-download', [0, 1], 'download imgs')
    arg_parse.add_option('-thread', [1], 'set thread count')
    arg_parse.add_option('-d', [1], 'set img store folder')
    arg_parse.add_option('-h', [0], 'print help')

    return arg_parse


gLogHandler = LogHandle('sex_scrap.log')


def main():
    arg_handler = arg_parser_init()
    if not arg_handler.parse(sys.argv) or arg_handler.check_option('-h'):
        print arg_handler
        return

    front_page_node = PPFrontPageNode()
    start_url = 'http://www.sex.com/'
    if arg_handler.check_option('-d'):
        front_page_node.set_download_folder(
            arg_handler.get_option_args('-d')[0])

    if arg_handler.check_option('-thread'):
Ejemplo n.º 4
0
import os
import sys
import time
import subprocess

from common_lib import LogHandle

gLogHandle = LogHandle('copy.log')


def do_move(from_path, to_path):
    command = ['mv', from_path, to_path]
    pipe = subprocess.Popen(args=command)
    while True:
        if pipe.poll() is not None:
            break
        time.sleep(1)
    return pipe.returncode


def do_copy(from_dir, to_dir):
    items = os.listdir(from_dir)
    file_list = list()
    for item in items:
        path = os.path.join(from_dir, item)
        if os.path.isfile(path):
            file_list.append(path)
    gLogHandle.log('Total file cnt [%d]' % len(file_list))
    if 0 == len(file_list):
        gLogHandle.log('All done')
        return True
Ejemplo n.º 5
0
import time
import imghdr
import sys
import platform
import multiprocessing
import sqlite3
import datetime
import ConfigParser
import multiprocessing
import socket
import mutex
import Queue
from multiprocessing import Process, Pipe
from common_lib import MyArgParse
from common_lib import LogHandle
gstLogHandler = LogHandle('hkpic.log')

Config_Path = 'config.ini'
Cookie_Path = 'cookie'


class LoginMethod:
    def __init__(self, set_get_cookie=False):
        self.log = gstLogHandler.log
        self.login_url = 'http://hk-pic2.xyz/member.php?mod=logging&action=login&loginsubmit=yes&infloat=yes&lssubmit=yes&inajax=1'
        #http://hkpic-forum.xyz/member.php?mod=logging&action=login&loginsubmit=yes&infloat=yes&lssubmit=yes&inajax=1
        self.status_url = 'http://hk-pic2.xyz/forum-18-2.html'  #http://hkpic-forum.xyz/forum.php
        self.config_file_path = Config_Path
        self.cookie_file_path = Cookie_Path
        self.username = ''
        self.password = ''
Ejemplo n.º 6
0
class MonitorFile:
    def __init__(self):
        self.set_monitor_period_sec = 3
        self.set_log_file_path = 'monitor_files.log'
        self.logHandle = LogHandle(self.set_log_file_path)
        self.set_store_path = ''

        self.lock = threading.Lock()
        self.monitor_file_list = list()
        self.change_file_list = list()

        self.alive_thread_cnt = 0
        self.need_quit = False

        pass

    def add_monitor_file(self, file_path):
        dict_item = dict()
        dict_item['file_path'] = file_path[:]
        dict_item['last_change_time'] = None
        self.lock.acquire()
        self.monitor_file_list.append(dict_item)
        self.lock.release()
        pass

    def delete_monitor_file(self, file_path):
        pass

    def set_monitor_period(self, time_sec):
        pass

    def set_store_folder(self, folder_path):
        if not os.path.exists(folder_path):
            os.mkdir(folder_path)
        self.set_store_path = folder_path[:]

    def get_new_file_path(self, old_file_path):
        file_name = os.path.basename(old_file_path)
        cur_datetime = datetime.datetime.now()
        new_file_path = os.path.join(self.set_store_path, file_name)
        for c in str(cur_datetime):
            if '0' <= c <= '9':
                new_file_path += c
        return new_file_path
        pass

    def do_if_change(self, file_path):
        max_size_once_read = 1 * 1024 * 1024
        new_file_name = self.get_new_file_path(file_path)
        if os.path.exists(new_file_name):
            self.logHandle.log('Warning, back file already exist [%s]' %
                               new_file_name)
            return
        with open(new_file_name, 'w+') as w_fd:
            with open(file_path, 'r') as r_fd:
                while True:
                    content = r_fd.read(max_size_once_read)
                    if not content:
                        break
                    w_fd.write(content)
        self.logHandle.log('Copy [%s] to [%s]' % (file_path, new_file_name))
        pass

    def start(self):
        pro = threading.Thread(target=self.monitor_thread)
        pro.start()
        pass

    def stop(self):
        self.need_quit = True
        while self.alive_thread_cnt:
            self.logHandle.log('waiting for thread quit...')
            time.sleep(0.5)
        self.logHandle.log('Quit now')

    def monitor_thread(self):
        self.alive_thread_cnt += 1
        while True:
            self.lock.acquire()
            for file_item in self.monitor_file_list:
                file_path = file_item['file_path']
                time_info = file_item['last_change_time']
                self.logHandle.log('Check files [%s]' % file_path)
                if os.path.exists(file_path):
                    file_stat = os.stat(file_path)
                    if file_stat.st_mtime != time_info:
                        self.change_file_list.append(file_item)
                        cur_time_info = file_stat.st_mtime
                        file_item['last_change_time'] = cur_time_info
                        self.logHandle.log('Add File [%s] to change list' %
                                           file_path)
            for file_item in self.change_file_list:
                self.logHandle.log('File [%s] will be processed' %
                                   file_item['file_path'])
                self.do_if_change(file_item['file_path'])
            self.change_file_list = list()
            self.lock.release()
            if self.need_quit:
                break
            time.sleep(self.set_monitor_period_sec)
            if self.need_quit:
                break
        self.alive_thread_cnt -= 1
        pass
Ejemplo n.º 7
0
import os
import sys
import json
import requests
import re
from common_lib import LogHandle
gstLogHandler = LogHandle('bing.log')
gImgJsonTmpFile = 'img_list_tmp.json'


class BingPic:
    def __init__(self):
        self.log = gstLogHandler.log
        self.m_set_store_folder = ''
        self.m_last_json_content = ''
        self.m_page_idx = 0
        self.m_set_imgs_json_url = 'https://cn.bing.com/HPImageArchive.aspx?format=js&idx=6&n=8'
        self.m_session_handler = requests.session()
        self.do_init()
        pass

    def do_init(self):
        self.set_store_folder('Imgs')
        set_headers = \
            {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0',
             'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
             'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3'}
        self.m_session_handler.headers = set_headers
        pass

    def get_url_to_parse(self, last_json_content=None):
Ejemplo n.º 8
0
import requests
import os
import sys
import json
from lxml import etree
import re
import datetime
import threading
import hashlib
import time
from common_lib import MyArgParse, LogHandle, ThreadHandler
from sqlite_util import DBRowHuaBan, DBHandler, DBRow, DBItem

gstLogHandle = LogHandle('geo.log')


class DBRowGeo(DBRow):
    def do_init(self):
        self.item_list.append(DBItem('pageUrl', 'CHAR'))
        self.item_list.append(DBItem('title', 'CHAR'))
        self.item_list.append(DBItem('profileUrl', 'CHAR'))
        self.item_list.append(DBItem('altText', 'CHAR'))
        self.item_list.append(DBItem('url', 'CHAR'))
        self.item_list.append(DBItem('url_hash', 'CHAR', is_primary=True))
        self.item_list.append(DBItem('is_done', 'INT'))
        pass

    def generate_select_cmd_str(self, table_name):
        ret_str = ' select '
        for idx, item in enumerate(self.item_list):
            ret_str += table_name + '.' + item.name