Esempio n. 1
0
from resource.renrenaccount import RenrenAccountErrorCode
from resource.renrenaccountpool import createProdRenrenAccountPool
from resource.renrenaccountpool import RenrenAccountPool
from resource.proxypool import createProdProxyPool
from resource.proxy import Proxy
from crawl.crawler import Crawler
from crawl.crawler import CrawlerException
from crawl.crawler import CrawlerErrorCode
from crawl.renrenagent import RenrenAgent
from crawl.startnodecrawler import StartNodeCrawler

import time
import threading
import signal

flag.defineFlag(name='waiting_time', type_=FlagType.INT, default=0,\
    description='Wait before crawling to let account become avaliable.(In mins)')

flag.defineFlag(name='accounts_limit', type_=FlagType.INT, default=10,\
    description='Account limit in a single thread.')
flag.defineFlag(name='thread_number', type_=FlagType.INT, default=8,\
    description='Crawling thread number in a single round.')
flag.defineFlag(name='round_number', type_=FlagType.INT, default=30,\
    description='Crawling round number.')

currentCrawler = None
stopSignal = False

def detectSignal(a, b):
    print "INT Signal detect"
    Crawler.setStopSignal()
Esempio n. 2
0
# -*- coding: utf-8 -*-

from jx import log
from jx import flag
from utils.util import isHanChar
from utils import confidential as CFD
from utils import globalconfig as GC
from data.database import Profile
from data.database import Gender
from data.readonlydatastore import createProdReadOnlyDataStore
from data.readonlydatastore import ReadOnlyDataStore
from analyse.result import Result
from entities.name_helper import NameHelper
    

flag.defineFlag('use_result_filter', flag.FlagType.BOOLEAN, True,\
    'Whether there is need to filter some unnecessary content in result.')

def valueCmp(x, y):
    return x[1].count < y[1].count

class Analyser:
    """Analysis the data in data store and build the index of the result."""

    dataStore = None # The data source.
    result = Result()

    def __init__(self):
        self.dataStore = createProdReadOnlyDataStore()

    def analyse(self):
        """Analyse the data."""
Esempio n. 3
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

from jx import flag
from jx import log
from entities.name_pb2 import RawNameItemInfo, GlobalNameInfo
from entities.name_helper import NameHelper

from struct import *

flag.defineFlag('xing_char_map_min_count', flag.FlagType.INT, 2,\
    'item info with count smaller than this will be filtered out.')
flag.defineFlag('xing_map_min_count', flag.FlagType.INT, 2,\
    'item info with count smaller than this will be filtered out.')
flag.defineFlag('ming_char_map_min_count', flag.FlagType.INT, 2,\
    'item info with count smaller than this will be filtered out.')
flag.defineFlag('ming_map_min_count', flag.FlagType.INT, 5,\
    'item info with count smaller than this will be filtered out.')
flag.defineFlag('xing_ming_map_min_count', flag.FlagType.INT, 5,\
    'item info with count smaller than this will be filtered out.')


class Result:
    """Analysis result."""
    globalInfo = None

    xingCharMap = None # Family name character map object.
    xingCharSortedArray = None # A array contains sorted xing Char.
                               # Set by @caculate.

    xingMap = None # Family name string map object.