Ejemplo n.º 1
0
def modiPath():
    host_collector_address = base_address + '\\CompleteModel\\HuaJiao_30_host_collector_modipath.pickle'
    whole_result_address = base_address + '\\Path_Modi\\result_huajiao_wvs_1113_modipath_modi'
    path_para_dic_address = base_address + '\\Path_Modi\\modi_path_dict.pickle'
    path_collector = []

    with open(host_collector_address, 'rb') as read_host_collector:
        host_collector = pickle.load(read_host_collector)
    anomaly_detector = AnomalyDetector()
    anomaly_writer = AnomalyWriter(whole_result_address)

    try:
        # save the begin time
        begin_time = datetime.datetime.now()
        detect_time_flag = True
        global study_ready_time
        global detect_ready_time

        with open(url_address, 'rb') as infile:
            record_num = 0
            for line in infile:
                line = line.strip(' \n')
                record = line.split('\t')
                if len(record) == 13 and record[_host] != '':
                    a_record = UrlRecord(record)
                    current_model = host_collector.get_host_model(
                        a_record.get_host())
                    pattern_flag = current_model.getDetectFlag()
                    if pattern_flag == 'Study ready':
                        if detect_time_flag:
                            detect_time_flag = False
                            study_ready_time = datetime.datetime.now()
                            study_interval = study_ready_time - begin_time
                            print 'Study consuming: %s' % study_interval
                        anomaly_status = anomaly_detector.detectpath(
                            a_record, current_model, 'new')
                        path_collector.append(anomaly_status['path_para'])
                        if anomaly_status['Result'] is True:
                            anomaly_writer.writePathResult(
                                a_record, anomaly_status)
                record_num += 1
                if record_num % 1000 == 0:
                    print 'Record completed: ', record_num,
                    print '\tUrl amount: %s\tDifferent url amount: %s\tDifferent sip: %s\t' % (
                        current_model.getUrlAmount(),
                        current_model.getDifUrlAmount(),
                        current_model.getSipAmount())
    except IndexError:
        print 'Lancer says:\n' \
           '\tIt has error through the system.'
        raise

    with open(path_para_dic_address, 'wb') as save:
        pickle.dump(path_collector, save)

    end_time = datetime.datetime.now()
    detect_ready_time = end_time - study_ready_time
    print "Detection consuming: %s" % detect_ready_time
    print 'Whole system test finished.'
Ejemplo n.º 2
0
def modiPath():
    host_collector_address = base_address + '\\CompleteModel\\HuaJiao_30_host_collector_modipath.pickle'
    whole_result_address = base_address + '\\Path_Modi\\result_huajiao_wvs_1113_modipath_modi'
    path_para_dic_address = base_address + '\\Path_Modi\\modi_path_dict.pickle'
    path_collector = []

    with open(host_collector_address, 'rb') as read_host_collector:
        host_collector = pickle.load(read_host_collector)
    anomaly_detector = AnomalyDetector()
    anomaly_writer = AnomalyWriter(whole_result_address)

    try:
        # save the begin time
        begin_time = datetime.datetime.now()
        detect_time_flag = True
        global study_ready_time
        global detect_ready_time

        with open(url_address, 'rb') as infile:
            record_num = 0
            for line in infile:
                line = line.strip(' \n')
                record = line.split('\t')
                if len(record) == 13 and record[_host] != '':
                    a_record = UrlRecord(record)
                    current_model = host_collector.get_host_model(a_record.get_host())
                    pattern_flag = current_model.getDetectFlag()
                    if pattern_flag == 'Study ready':
                        if detect_time_flag:
                            detect_time_flag = False
                            study_ready_time = datetime.datetime.now()
                            study_interval = study_ready_time - begin_time
                            print 'Study consuming: %s' % study_interval
                        anomaly_status = anomaly_detector.detectpath(a_record, current_model, 'new')
                        path_collector.append(anomaly_status['path_para'])
                        if anomaly_status['Result'] is True:
                                anomaly_writer.writePathResult(a_record, anomaly_status)
                record_num += 1
                if record_num % 1000 == 0:
                    print 'Record completed: ', record_num,
                    print '\tUrl amount: %s\tDifferent url amount: %s\tDifferent sip: %s\t' % (current_model.getUrlAmount(), current_model.getDifUrlAmount(), current_model.getSipAmount())
    except IndexError:
        print 'Lancer says:\n' \
           '\tIt has error through the system.'
        raise

    with open(path_para_dic_address, 'wb') as save:
        pickle.dump(path_collector, save)

    end_time = datetime.datetime.now()
    detect_ready_time = end_time - study_ready_time
    print "Detection consuming: %s" % detect_ready_time
    print 'Whole system test finished.'
Ejemplo n.º 3
0
def main():
    with open(host_collector_address, 'rb') as read_host_collector:
        host_collector = pickle.load(read_host_collector)
    anomaly_detector = AnomalyDetector()
    anomaly_collector = AnomalyCollector()
    result_controller = ResultController()
    anomaly_writer = AnomalyWriter(whole_result_address)

    try:
        # save the begin time
        begin_time = datetime.datetime.now()
        detect_time_flag = True
        global study_ready_time
        global detect_ready_time

        with open(url_address, 'rb') as infile:
            record_num = 0
            for line in infile:
                line = line.strip(' \n')
                record = line.split('\t')
                if len(record) == 13 and record[_host] != '':
                    a_record = UrlRecord(record)
                    current_model = host_collector.get_host_model(a_record.get_host())
                    pattern_flag = current_model.getDetectFlag()
                    if pattern_flag == 'Study ready':
                        if detect_time_flag:
                            detect_time_flag = False
                            study_ready_time = datetime.datetime.now()
                            study_interval = study_ready_time - begin_time
                            print 'Study consuming: %s' % study_interval
                        anomaly_status = anomaly_detector.detect(a_record, current_model)
                        if anomaly_status['Result'] is True:
                            valid_flag = result_controller.isValid(a_record, anomaly_collector)
                            if valid_flag is True:
                                anomaly_writer.writeCompleteResult(a_record, anomaly_status)
                                # anomaly_writer.writeTimeAttribute(a_record)
                            else:
                                current_model.reStudy()
                record_num += 1
                if record_num % 1000 == 0:
                    print 'Record completed: ', record_num,
                    print '\tUrl amount: %s\tDifferent url amount: %s\tDifferent sip: %s\t' % (current_model.getUrlAmount(), current_model.getDifUrlAmount(), current_model.getSipAmount())
    except IndexError:
        print 'Lancer says:\n' \
           '\tIt has error through the system.'
        raise

    end_time = datetime.datetime.now()
    detect_ready_time = end_time - study_ready_time
    print "Detection consuming: %s" % detect_ready_time
    print 'Whole system test finished.'
Ejemplo n.º 4
0
def main():
    cookie = get_cookie()
    url_record = UrlRecord()

    current_url_id = get_current_url_id()
    while current_url_id < 1000:
        person_url = url_record.get(current_url_id)
        if person_url is None:
            break

        url = person_url[0]
        name = person_url[1]

        file_name = build_file_name(current_url_id, url, name)
        fp = open(file_name, 'w')

        # 将用户的微博写入文件中
        people = PeoplePage(url, cookie)
        total_num = people.get_blog_page_total_num()
        for i in range(1, total_num + 1):
            blog_list = people.get_blog_list(i)
            for blog in blog_list:
                fp.write(blog[0])
                fp.write('\t')
                fp.write(blog[1])
                fp.write('\t')
                fp.write(blog[2])
                fp.write('\t')
                fp.write('\n')
            print("%s Complete %d" % (name, i * 100 / total_num))
        fp.close()

        # 将当前用户的关注用户写入数据库
        care_page_url = people.get_care_people_page_url()
        print care_page_url
        if care_page_url == '':
            break

        care_people_page = CarePeoplePage(care_page_url, cookie)
        total_num = care_people_page.get_page_total_num()

        for i in range(1, total_num + 1):
            people_list = care_people_page.get_people_list(i)
            for people in people_list:
                url_record.add(people[1].decode('utf-8'), people[0].decode('utf-8'))

        current_url_id += 1
        set_current_url_id(current_url_id)
Ejemplo n.º 5
0
def main():
    host_collector = HostCollector()
    anomaly_detector = AnomalyDetector()
    anomaly_collector = AnomalyCollector()
    result_controller = ResultController()
    anomaly_writer = AnomalyWriter(whole_result_address)

    try:
        # save the begin time
        begin_time = datetime.datetime.now()
        detect_time_flag = True
        global study_ready_time
        global detect_ready_time

        with open(url_address, 'rb') as infile:
            record_num = 0
            study_record_num = 0
            for line in infile:
                record = line.strip(' \n').split('\t')
                if len(record) == 13 and record[_host] != '':
                    a_url = UrlRecord(record)
                    current_model = host_collector.getHostModel(
                        a_url.get_host())
                    pattern_flag = current_model.getDetectFlag()
                    # Judge Host-model's pattern('Study ready' or 'Study...')
                    if pattern_flag == 'Study ready':
                        if detect_time_flag:
                            detect_time_flag = False
                            study_ready_time = datetime.datetime.now()
                            study_interval = study_ready_time - begin_time
                            study_record_num = record_num
                            print 'Study consuming: %s' % study_interval
                            print 'Study Record: %s' % study_record_num
                            print '\tUrl amount: %s\tDifferent url amount: %s\tDifferent sip: %s\t' % \
                                (current_model.getUrlAmount(), current_model.getDifUrlAmount(),
                                 current_model.getSipAmount())
                        # Detect whether the record is anomaly
                        anomaly_status = anomaly_detector.detect(
                            a_url, current_model)
                        # If the record is detected to be anomaly
                        if anomaly_status['Result'] is True:
                            valid_flag = result_controller.isValid(
                                a_url, anomaly_collector)
                            if valid_flag is True:
                                anomaly_writer.writeResult(
                                    a_url, anomaly_status)
                            else:
                                current_model.reStudy()
                    elif pattern_flag == 'Study...':
                        current_model.add_record(a_url)
                    else:
                        raise ValueError(
                            'Lancer says: pattern selection error.')
                record_num += 1
                if record_num % 10000 == 0:
                    print 'Record completed: ', record_num,
            else:
                detect_record_num = record_num - study_record_num
                end_time = datetime.datetime.now()
                detect_ready_time = end_time - study_ready_time
                print 'Detect Record: %s' % detect_record_num
                print "Detection consuming: %s" % detect_ready_time
                print 'Whole system test finished.'

        # Store the completed Host Model.
        with open(host_collector_address, 'wb') as save_host_collector:
            pickle.dump(host_collector, save_host_collector)

    except IndexError:
        print 'Lancer says:\n\tIt has error through the system.'
        raise
Ejemplo n.º 6
0
def main():
    host_collector = HostCollector()
    anomaly_detector = AnomalyDetector()
    anomaly_collector = AnomalyCollector()
    result_controller = ResultController()
    anomaly_writer = AnomalyWriter(whole_result_address)

    try:
        # save the begin time
        begin_time = datetime.datetime.now()
        detect_time_flag = True
        global study_ready_time
        global detect_ready_time

        with open(url_address, 'rb') as infile:
            record_num = 0
            study_record_num = 0
            for line in infile:
                record = line.strip(' \n').split('\t')
                if len(record) == 13 and record[_host] != '':
                    a_url = UrlRecord(record)
                    current_model = host_collector.getHostModel(a_url.get_host())
                    pattern_flag = current_model.getDetectFlag()
                    # Judge Host-model's pattern('Study ready' or 'Study...')
                    if pattern_flag == 'Study ready':
                        if detect_time_flag:
                            detect_time_flag = False
                            study_ready_time = datetime.datetime.now()
                            study_interval = study_ready_time - begin_time
                            study_record_num = record_num
                            print 'Study consuming: %s' % study_interval
                            print 'Study Record: %s' % study_record_num
                            print '\tUrl amount: %s\tDifferent url amount: %s\tDifferent sip: %s\t' % \
                                (current_model.getUrlAmount(), current_model.getDifUrlAmount(),
                                 current_model.getSipAmount())
                        # Detect whether the record is anomaly
                        anomaly_status = anomaly_detector.detect(a_url, current_model)
                        # If the record is detected to be anomaly
                        if anomaly_status['Result'] is True:
                            valid_flag = result_controller.isValid(a_url, anomaly_collector)
                            if valid_flag is True:
                                anomaly_writer.writeResult(a_url, anomaly_status)
                            else:
                                current_model.reStudy()
                    elif pattern_flag == 'Study...':
                        current_model.add_record(a_url)
                    else:
                        raise ValueError('Lancer says: pattern selection error.')
                record_num += 1
                if record_num % 10000 == 0:
                    print 'Record completed: ', record_num,
            else:
                detect_record_num = record_num - study_record_num
                end_time = datetime.datetime.now()
                detect_ready_time = end_time - study_ready_time
                print 'Detect Record: %s' % detect_record_num
                print "Detection consuming: %s" % detect_ready_time
                print 'Whole system test finished.'

        # Store the completed Host Model.
        with open(host_collector_address, 'wb') as save_host_collector:
            pickle.dump(host_collector, save_host_collector)

    except IndexError:
        print 'Lancer says:\n\tIt has error through the system.'
        raise
Ejemplo n.º 7
0
def main():
    fp_male = open('./male.txt', 'a', encoding='utf-8')
    fp_female = open('./female.txt', 'a', encoding='utf-8')

    cookie_index = 1
    url_record = UrlRecord()
    current_url_id = get_current_url_id()

    sleep_time = 4

    while True:
        print('current cookie index is', cookie_index)
        cookie = get_cookie(cookie_index)
        print('current url id is', current_url_id)

        people_url = url_record.get(current_url_id)
        if people_url is None:
            print("people url is None")
            break

        url = people_url[0]

        people = PeoplePage(url, cookie)
        time.sleep(sleep_time)
        state = people.get_state()
        if state:
            if state.find('HTTP Error 403: Forbidden') != -1:
                break

        info_url = people.get_people_info_page_url()
        if info_url is None:
            print('info url is None')
            current_url_id += 1
            set_current_url_id(current_url_id)
            continue

        print(info_url)
        info_page = PeopleInfoPage(info_url, cookie)
        time.sleep(sleep_time)
        info = info_page.get_people_info()

        # print(info)
        if 'name' in info and 'gender' in info:
            name = info['name']
            # print(name)
            if info['gender'] == u'男':
                fp_male.write(name)
                fp_male.write('\n')
                fp_male.flush()
            elif info['gender'] == u'女':
                fp_female.write(name)
                fp_female.write('\n')
                fp_female.flush()

        # 将当前用户的关注用户写入数据库
        care_page_url = people.get_care_people_page_url()
        if care_page_url is None:
            print('care page url is None')
            current_url_id += 1
            set_current_url_id(current_url_id)
            continue

        care_people_page = CarePeoplePage(care_page_url, cookie)
        time.sleep(sleep_time)
        people_list = care_people_page.get_people_list(1)
        time.sleep(sleep_time)
        for people in people_list:
            url_record.add(people[1], people[0])

        current_url_id += 1
        set_current_url_id(current_url_id)

    fp_male.close()
    fp_female.close()