Esempio n. 1
0
def run(q, channel, _type):

    if _type in ['raw', 'check']:
        handler = ETLRunnerFromRaw(channel)
    else:
        handler = ETLRunner(channel)
    sucess_cnt = 0
    fail_cnt = 0

    fail_save_file_name = 'result/%s_fail_ids_%d.txt' % (channel, os.getpid())
    fail_save_file = FileSave(fail_save_file_name)

    result_file_name = 'result/%s_statistics_%d.txt' % (channel, os.getpid())
    result_file = FileSave(result_file_name)

    while 1:
        job = get_job(q, _type)
        if job is None:
            break
        try:
            handler.run(job)
            sucess_cnt += 1
            print >> result_file.fd, "%s" % (job['indexUrl'])
            if sucess_cnt % 1000 == 0:
                print "process %d, time: %s, success copied: %d, " \
                      "fail copied: %d, fail_save_file: %s, result_file: %s" % (os.getpid(),
                                                                                time.ctime(),
                                                                                sucess_cnt,
                                                                                fail_cnt,
                                                                                fail_save_file_name,
                                                                                result_file_name)
        except Exception as e:
            traceback.print_exc()
            fail_cnt += 1
            fail_save_file.append_end_with(job['indexUrl'])
Esempio n. 2
0
class FixCvField(BaseTask):
    def __init__(self, channel, field, field_pattern):
        BaseTask.__init__(self, "fix_cv_field")
        self.channel = channel
        self.field = field
        self.field_pattern = field_pattern

        self.bugfix = BugFixDispatcher(channel, self._queue)
        self.dispatcher = lambda q: self.bugfix.dispatcher()

        self.etl_runner = ETLRunnerFromRaw(channel)


    def run_job(self, job):

        try:
             if not isinstance(job, dict):
                return

             split_field = self.field.split('.')
             if len(split_field) == 2:
                 field_1 = getattr(job, split_field[0])
                 if isinstance(field_1, list):
                     for item in field_1:
                         field_2 = getattr(item, split_field[1])
                         assert isinstance(field_2, basestring)
                         if re.search(self.field_pattern, field_2):
                             # 匹配模式,重新量化
                             print "cvId: ", job.get('cvId'), " || ",
                             print self.field_pattern, " : ", field_2, " || ",
                             self.etl_runner.run(job)

        except Exception as e:
            traceback.print_exc()
Esempio n. 3
0
    def __init__(self, channel, field, field_pattern):
        BaseTask.__init__(self, "fix_cv_field")
        self.channel = channel
        self.field = field
        self.field_pattern = field_pattern

        self.bugfix = BugFixDispatcher(channel, self._queue)
        self.dispatcher = lambda q: self.bugfix.dispatcher()

        self.etl_runner = ETLRunnerFromRaw(channel)