Example #1
0
def do_import_rtq(db, password='', newfileonly=False):
    loader = FileLoader()
    db = DB(password=password, db=db)
    db.create_table_deadlink()
    # print 'try loading', sys.argv[1]
    # lines = loader.load(
    # sys.argv[1])
    newdate = str(db.get_last_day('deadlink'))

    if newdate == 'None':
        newfileonly = False
    for file in utils.dir_listfile('result'):
        rawdatestr = utils.stripDateStr(file).group(1)
        datestr = utils.parseDateString(rawdatestr)
        if newfileonly:
            if datestr > newdate:
                lines = loader.load(file, 7)
                print 'staring insert lines from', file, 'datetime is', datestr
                db.inserts_deadlink(lines, date=datestr)
                print 'insert completed'
            else:
                print datestr, 'skiped'
        else:
            lines = loader.load(file, 7)
            print 'staring insert lines from', file, 'datetime is', datestr
            db.inserts_deadlink(lines, date=datestr)
            print 'insert completed'
Example #2
0
def do_import_rtq(db, password='', newfileonly=False):
    loader = FileLoader()
    db = DB(password=password, db=db)
    db.create_table_deadlink()
    # print 'try loading', sys.argv[1]
    # lines = loader.load(
    # sys.argv[1])
    newdate = str(db.get_last_day('deadlink'))

    if newdate == 'None':
        newfileonly = False
    for file in utils.dir_listfile('result'):
        rawdatestr = utils.stripDateStr(file).group(1)
        datestr = utils.parseDateString(rawdatestr)
        if newfileonly:
            if datestr > newdate:
                lines = loader.load(file, 7)
                print 'staring insert lines from', file, 'datetime is', datestr
                db.inserts_deadlink(lines, date=datestr)
                print 'insert completed'
            else:
                print datestr, 'skiped'
        else:
            lines = loader.load(file, 7)
            print 'staring insert lines from', file, 'datetime is', datestr
            db.inserts_deadlink(lines, date=datestr)
            print 'insert completed'
Example #3
0
def do_import_rcu(db, password='', newfileonly=False):
    loader = FileLoader()
    db = DB(password=password, db=db)
    db.create_table_deadlink_classify()
    cates = ['aladdin', 'h5', 'lightaap', 'normal', 'siteapp', 'tc']
    newdate = str(db.get_last_day('deadlink'))

    if newdate == 'None':
        newfileonly = False

    def httpcodeNot200(line):
        return line[2] != '200'

    for category in cates:
        for file in utils.dir_listfile(
                'result',
                subdirprefix='result_spider_random_classfiy_url',
                fileSubPrefix='result_spider_10000_' + category):
            datestr = utils.getDateFromStr(file)
            if newfileonly:
                if datestr > newfileonly:
                    lines = loader.load(file, 5)
                    try:
                        filteredlines = [
                            i
                            for i in itertools.ifilter(httpcodeNot200, lines)
                        ]
                    except TypeError, e:
                        print >> sys.stderr, e.args
                        continue
                    print 'deads', len(
                        filteredlines
                    ), 'staring insert lines from', file, 'datetime is', datestr
                    db.inserts_deadlink_classify(filteredlines,
                                                 cls=category,
                                                 date=datestr)
                    print 'insert completed'
                else:
                    print datestr, 'skiped'
            else:
                lines = loader.load(file, 5)
                try:
                    filteredlines = [
                        i for i in itertools.ifilter(httpcodeNot200, lines)
                    ]
                except TypeError, e:
                    print >> sys.stderr, e.args
                    continue
                print 'deads', len(
                    filteredlines
                ), 'staring insert lines from', file, 'datetime is', datestr
                db.inserts_deadlink_classify(filteredlines,
                                             cls=category,
                                             date=datestr)
                print 'insert completed'
Example #4
0
def do_import_rcu(db, password='', newfileonly=False):
    loader = FileLoader()
    db = DB(password=password, db=db)
    db.create_table_deadlink_classify()
    cates = ['aladdin', 'h5', 'lightaap', 'normal', 'siteapp', 'tc']
    newdate = str(db.get_last_day('deadlink'))

    if newdate == 'None':
        newfileonly = False

    def httpcodeNot200(line):
        return line[2] != '200'
    for category in cates:
        for file in utils.dir_listfile('result',
                                       subdirprefix='result_spider_random_classfiy_url',
                                       fileSubPrefix='result_spider_10000_' + category):
            datestr = utils.getDateFromStr(file)
            if newfileonly:
                if datestr > newfileonly:
                    lines = loader.load(file, 5)
                    try:
                        filteredlines = [i for i in itertools.ifilter(httpcodeNot200, lines)]
                    except TypeError, e:
                        print >> sys.stderr, e.args
                        continue
                    print 'deads', len(filteredlines), 'staring insert lines from', file, 'datetime is', datestr
                    db.inserts_deadlink_classify(filteredlines, cls=category, date=datestr)
                    print 'insert completed'
                else:
                    print datestr, 'skiped'
            else:
                lines = loader.load(file, 5)
                try:
                    filteredlines = [i for i in itertools.ifilter(httpcodeNot200, lines)]
                except TypeError, e:
                    print >> sys.stderr, e.args
                    continue
                print 'deads', len(filteredlines), 'staring insert lines from', file, 'datetime is', datestr
                db.inserts_deadlink_classify(filteredlines, cls=category, date=datestr)
                print 'insert completed'
Example #5
0
def do_test(db, password=''):
    db = DB(password=password, db=db)
    ret = db.get_last_day('classify')
    print 'last day is', ret
Example #6
0
def do_test(db, password=''):
    db = DB(password=password, db=db)
    ret = db.get_last_day('classify')
    print 'last day is', ret