コード例 #1
0
def test_shutdown_reason():
    data = parse(SHUTDOWN)
    assert data['shutdown_reason'] == 'Received SIGTERM twice'
    assert data['finish_reason'] == 'shutdown'

    data = parse(SHUTDOWN.replace('twice', ''))
    assert data['shutdown_reason'] == 'Received SIGTERM'
    assert data['finish_reason'] == 'shutdown'
コード例 #2
0
def test_only_stats_dumped():
    replaces = [(
        "'downloader/response_status_count/302': 1,",
        "'downloader/response_status_count/302': 7,\n 'downloader/response_status_count/301': 8,"
    ), ("'response_received_count': 3,", "'response_received_count': 30,"),
                ("'item_scraped_count': 2,", "'item_scraped_count': 20,"),
                ("'log_count/ERROR': 5,", "'log_count/ERROR': 4,"),
                ("'finish_reason': 'finished',",
                 "'finish_reason': 'forceshutdown',")]
    dict_count = dict(critical_logs=5,
                      error_logs=4,
                      warning_logs=3,
                      redirect_logs=15,
                      retry_logs=2,
                      ignore_logs=1)
    text = END
    for replace in replaces:
        text = text.replace(*replace)
    data = parse(text, headlines=50, taillines=50)
    # cst.json_dumps(data)
    assert data['first_log_time'] == '2018-10-23 18:29:41'
    assert data['latest_log_time'] == '2018-10-23 18:29:42'
    assert data['runtime'] == '0:00:01'
    assert data['datas'] == []
    assert data['pages'] == 30
    assert data['items'] == 20
    for k, v in data['latest_matches'].items():
        assert v == ''
    for k, v in dict_count.items():
        assert data['log_categories'][k]['count'] == v
        assert data['log_categories'][k]['details'] == []
    assert data['finish_reason'] == 'forceshutdown'
コード例 #3
0
ファイル: test_parse.py プロジェクト: sulthonzh/logparser
def test_invalid_log():
    for text in ["", ERROR_404]:
        data = parse(text)
        cst.json_dumps(data)
        if not text:
            assert not (data['head'] or data['tail'])
        else:
            assert '404 - No Such Resource' in data['head'] and '404 - No Such Resource' in data['tail']

        assert set(data.keys()) == set(cst.PARSE_KEYS)
        for k in ['first_log_time', 'latest_log_time', 'runtime', 'shutdown_reason', 'finish_reason']:
            assert data[k] == cst.NA
        for k in ['first_log_timestamp', 'latest_log_timestamp', 'latest_crawl_timestamp', 'latest_scrape_timestamp']:
            assert data[k] == 0
        for k in ['pages', 'items']:
            assert data[k] is None
        # assert data['last_update_timestamp'] > 0  # 1546272001
        # assert len(data['last_update_time']) == 19  # "2019-01-01 00:00:01"
        assert cst.string_to_timestamp(data['last_update_time']) == data['last_update_timestamp']
        assert data['datas'] == []

        for v in data['latest_matches'].values():
            assert v == ''
        assert set(data['latest_matches'].keys()) == set(cst.LATEST_MATCHES_RESULT_DICT.keys())

        for v in data['log_categories'].values():
            assert v == dict(count=0, details=[])
        assert set(data['log_categories'].keys()) == set(cst.LOG_CATEGORIES_RESULT_DICT.keys())
コード例 #4
0
ファイル: parse.py プロジェクト: zsy785622069/scrapydweb
    def dispatch_request(self, **kwargs):
        try:
            # Use io.open for compatibility with Python 2
            with io.open(os.path.join(self.PARSE_PATH, self.filename),
                         encoding='utf-8',
                         errors='ignore') as f:
                self.text = f.read()
        except Exception as err:
            return render_template(
                self.template_fail,
                node=self.node,
                alert="An error occurred when reading the uploaded logfile",
                text='%s\n%s' % (err.__class__.__name__, err))

        self.get_job_info()

        kwargs = dict(
            project=self.project,
            spider=self.spider,
            job=self.job,
            url_source=url_for('.source', filename=self.filename),
            # url_utf8=url_utf8, # To hide url_utf8 link in page http://127.0.0.1:5000/log/uploaded/ScrapydWeb_demo.log
        )
        kwargs.update(parse(self.text))
        # self.logger.debug("Parsed result: %s" % self.json_dumps(kwargs))
        return render_template(self.template, node=self.node, **kwargs)
コード例 #5
0
ファイル: log.py プロジェクト: my8100/public-test
    def dispatch_request(self, **kwargs):
        if self.report_logparser:
            self.read_stats_for_report()
        # Try to request stats by LogParser to avoid reading/requesting the whole log
        if not self.logparser_valid and (self.stats_logparser
                                         or self.report_logparser):
            if self.IS_LOCAL_SCRAPYD_SERVER and self.LOCAL_SCRAPYD_LOGS_DIR:
                self.read_local_stats_by_logparser()
            if not self.logparser_valid:
                self.request_stats_by_logparser()

        if not self.logparser_valid and not self.text:
            # Try to read local logfile
            if self.IS_LOCAL_SCRAPYD_SERVER and self.LOCAL_SCRAPYD_LOGS_DIR:
                self.read_local_scrapy_log()
            # Has to request scrapy logfile
            if not self.text:
                self.request_scrapy_log()
                if self.status_code != 200:
                    if self.stats_logparser or self.report_logparser:
                        self.load_backup_stats()
                    if not self.backup_stats_valid:
                        if not self.report_logparser:
                            kwargs = dict(node=self.node,
                                          url=self.url,
                                          status_code=self.status_code,
                                          text=self.text)
                            return render_template(self.template_fail,
                                                   **kwargs)
            else:
                self.url += self.SCRAPYD_LOG_EXTENSIONS[0]
        else:
            self.url += self.SCRAPYD_LOG_EXTENSIONS[0]

        if (not self.utf8_realtime and not self.logparser_valid and self.text
                and self.status_code in [0, 200]):
            self.logger.warning('Parse the whole log')
            self.stats = parse(self.text)
            # Note that the crawler_engine is not available when using parse()
            self.stats.setdefault('crawler_engine', {})
            self.stats.setdefault('status', self.OK)

        if self.report_logparser:
            if self.stats and not self.stats.setdefault('from_memory', False):
                self.simplify_stats_for_report()
                self.keep_stats_for_report()
            get_flashed_messages()
            # 0, -1, 404 load backup
            if self.status_code < 100 or self.stats:
                status_code = 200
            else:
                status_code = self.status_code
            return self.json_dumps(self.stats or dict(status='error'),
                                   as_response=True), status_code
        else:
            self.update_kwargs()
            if self.ENABLE_MONITOR and self.POST:  # Only poll.py would make POST request
                self.monitor_alert()
            return render_template(self.template, **self.kwargs)
コード例 #6
0
ファイル: log.py プロジェクト: zzzz123321/scrapydweb
    def update_kwargs(self):
        if self.utf8_realtime:
            self.kwargs['text'] = self.text
            self.kwargs['last_update_timestamp'] = time.time()
            if self.job_finished or self.job_key in self.job_finished_set:
                self.kwargs['url_refresh'] = ''
            else:
                self.kwargs['url_refresh'] = 'javascript:location.reload(true);'
        else:
            # Parsed data comes from json.loads, for compatibility with Python 2,
            # use str(time_) to avoid [u'2019-01-01 00:00:01', 0, 0, 0, 0] in JavaScript.
            if self.logparser_valid:
                for d in self.stats['datas']:
                    d[0] = str(d[0])
            else:
                self.logger.warning('Parse the whole log')
                self.stats = parse(self.text)
                # Note that the crawler_engine is not available when using parse()
                self.stats['crawler_engine'] = {}
            # For sorted orders in stats.html with Python 2
            for k in ['crawler_stats', 'crawler_engine']:
                if self.stats[k]:
                    self.stats[k] = self.get_ordered_dict(self.stats[k])

            if self.BACKUP_STATS_JSON_FILE:
                self.backup_stats()
            self.kwargs.update(self.stats)

            if (self.kwargs['finish_reason'] == self.NA
               and not self.job_finished
               and self.job_key not in self.job_finished_set):
                # http://flask.pocoo.org/docs/1.0/api/#flask.Request.url_root
                # _query_string = '?ui=mobile'
                # self.url_refresh = request.script_root + request.path + _query_string
                self.kwargs['url_refresh'] = 'javascript:location.reload(true);'
            if self.kwargs['url_refresh']:
                if self.stats_logparser and not self.logparser_valid:
                    self.kwargs['url_jump'] = ''
                else:
                    self.kwargs['url_jump'] = url_for('log', node=self.node, opt='stats', project=self.project,
                                                      spider=self.spider, job=self.job, with_ext=self.with_ext,
                                                      ui=self.UI, realtime='True' if self.stats_logparser else None)

        # Stats link of 'a.json' from the Logs page should hide these links
        if self.with_ext and self.job.endswith('.json'):
            self.kwargs['url_source'] = ''
            self.kwargs['url_opt_opposite'] = ''
            self.kwargs['url_refresh'] = ''
            self.kwargs['url_jump'] = ''
        else:
            self.kwargs['url_source'] = self.url
            self.kwargs['url_opt_opposite'] = url_for('log', node=self.node,
                                                      opt='utf8' if self.opt == 'stats' else 'stats',
                                                      project=self.project, spider=self.spider, job=self.job,
                                                      job_finished=self.job_finished, with_ext=self.with_ext,
                                                      ui=self.UI)
コード例 #7
0
def test_scrapy_fieldstats():
    data = parse(SCRAPY_FIELDSTATS)
    d = data['crawler_stats']
    assert d['fields_coverage'] == {
        u'Chinese 汉字': '50%',
        'author': {
            'a': 1,
            'b': 2
        }
    }
コード例 #8
0
def test_latest_scrape_item():
    data = parse(LATEST_SCRAPE_ITEM_ONE_LINE)
    d = data['latest_matches']
    latest_scrape = '2019-01-01 00:00:01 [scrapy.core.scraper] DEBUG: Scraped from <200 http://httpbin.org/get>'
    assert d['latest_scrape'] == latest_scrape
    assert d['latest_item'] == "{'item': 1}"

    data = parse(LATEST_SCRAPE_ITEM_MULTIPLE_LINES)
    d = data['latest_matches']
    latest_scrape = '2019-01-01 00:00:02 [scrapy.core.scraper] DEBUG: Scraped from <200 http://httpbin.org/get>'
    assert d['latest_scrape'] == latest_scrape
    assert json.loads(d['latest_item'].replace("'", '"')) == dict(item=2)

    data = parse(LATEST_SCRAPE_ITEM_MIXED)
    d = data['latest_matches']
    latest_scrape = '2019-01-01 00:00:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://httpbin.org/get>'
    assert d['latest_scrape'] == latest_scrape
    assert json.loads(d['latest_item'].replace("u'", "'").replace(
        "'", '"')) == dict(item={u'Chinese 汉字': 3})
コード例 #9
0
def test_telnet_info():
    data = parse(TELNET_160_DEFAULT)
    d = data['latest_matches']
    assert d['scrapy_version'] == '1.6.0'
    assert d['telnet_console'] == '127.0.0.1:6024'
    assert d['telnet_username'] == ''
    assert d['telnet_password'] == '9d3a29f17ee1bf9a'

    data = parse(TELNET_160_USERNAME)
    d = data['latest_matches']
    assert d['telnet_username'] == 'usr123'
    assert d['telnet_password'] == 'd24ad6be287d69b3'

    data = parse(TELNET_160_PASSWORD)
    d = data['latest_matches']
    assert d['telnet_username'] == ''
    assert d['telnet_password'] == '456psw'

    data = parse(TELNET_160_USERNAME_PASSWORD)
    d = data['latest_matches']
    assert d['telnet_username'] == 'usr123'
    assert d['telnet_password'] == '456psw'
コード例 #10
0
ファイル: test_parse.py プロジェクト: sulthonzh/logparser
def test_demo_log():
    modified_logstats = FRONT.replace("Crawled 3 pages (at 0 pages/min), scraped 2 items (at 0 items/min)",
                                      "Crawled 1 pages (at 2 pages/min), scraped 3 items (at 4 items/min)")
    for case, text in zip(['without_stats_dumped', 'whole_log', 'modified_logstats'],
                          [FRONT, FRONT + END, modified_logstats + END]):
        data = parse(text, headlines=50, taillines=100)  # 180 lines in total
        # cst.json_dumps(data)

        if case == 'without_stats_dumped':
            cst.check_demo_data(data, without_stats_dumped=True)
        elif case == 'modified_logstats':  # to test update_data_with_crawler_stats()
            cst.check_demo_data(data, without_stats_dumped=False, modified_logstats=True)
        else:
            cst.check_demo_data(data, without_stats_dumped=False)
コード例 #11
0
def communicate(log_root):
    log_file = os.path.join(log_root, 'files.log')
    if not os.path.isfile(log_file):
        return

    LOG_FILE = parse(log_file)
    for line in LOG_FILE.context.itertuples():
        if is_nan(getattr(line, 'extracted', None)):
            continue

        local_name = line.extracted
        dump_path = os.path.join(DUMP_PATH, local_name)
        if not os.path.exists(dump_path):
            warnings.warn(f'No such file or directory: {local_name!r}',
                          ExtractWarning)
            return
コード例 #12
0
ファイル: tests.py プロジェクト: croach/complenet_2010
def main():
	if len(sys.argv) < 2:
		sys.exit("Usage: %s [SOURCE_DIR|LOG_FILE]" % sys.argv[0])
	sourcedir = os.path.relpath(sys.argv[1])
	logs = list(parse(sourcedir))
	remove_redundant_authors(logs)
	network = build_network(sourcedir, logs)
	authors = [n for n in network if network.node[n]['type'] == 'author']
	projection = project_graph(network, authors)

	results = {
		'Random'      : decomp_by_random(network, projection, authors),
		'Commit Count': decomp_by_commit_count(network, projection, logs),
		'Degree'      : decomp_by_degree(network, projection, authors),
		'Closeness'   : decomp_by_closeness(network, projection, authors),
		'Betweenness' : decomp_by_betweenness(network, projection, authors),
		'Eigenvector' : decomp_by_eigenvector(network, projection, authors)
	}
	print_results(results)
コード例 #13
0
def generate_log(log_name):
    global DATE
    date = time.strftime('%Y-%m-%d')
    if date != DATE:
        archive(DATE)
        DATE = date
    INFO = os.path.join(LOGS_PATH, 'info', f'{DATE}.log')

    log_stem = log_name
    log_root = os.path.join(LOGS_PATH, log_name)
    log_uuid = re.match(
        r'.*?-(?P<uuid>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})',
        log_stem, re.IGNORECASE).group('uuid')

    log_file = os.path.join(log_root, 'files.log')
    if not os.path.isfile(log_file):
        return

    LOG_FILE = parse(log_file)
    LOG_CONN = parse(os.path.join(log_root, 'conn.log'))
    for line in LOG_FILE.context.itertuples():
        if is_nan(getattr(line, 'extracted', None)):
            continue
        hosts = [
            dict(tx=ipaddress.ip_address(tx), rx=ipaddress.ip_address(rx))
            for (tx, rx) in zip(line.tx_hosts, line.rx_hosts)
        ]

        conns = list()
        is_orig = line.is_orig
        for conn_uid in line.conn_uids:
            record = next(
                LOG_CONN.context[lambda df: df.uid == conn_uid].iterrows())[1]  # pylint: disable=cell-var-from-loop
            if is_orig:
                conn = dict(
                    src_h=ipaddress.ip_address(record['id.orig_h']),
                    src_p=int(record['id.orig_p']),
                    dst_h=ipaddress.ip_address(record['id.resp_h']),
                    dst_p=int(record['id.resp_p']),
                )
            else:
                conn = dict(
                    src_h=ipaddress.ip_address(record['id.resp_h']),
                    src_p=int(record['id.resp_p']),
                    dst_h=ipaddress.ip_address(record['id.orig_h']),
                    dst_p=int(record['id.orig_p']),
                )
            conns.append(conn)

        local_name = line.extracted
        mime_type = None
        dump_path = os.path.join(DUMP_PATH, local_name)
        if os.path.exists(dump_path):
            with contextlib.suppress(Exception):
                mime_type = magic.detect_from_filename(dump_path).mime_type
            # if mime_type is None or MIME_REGEX.match(mime_type) is None:
            #     if MIME_MODE:
            #         local_name = rename_dump(local_name, line.mime_type)
            # else:
            #     if MIME_MODE or (mime_type != line.mime_type):  # pylint: disable=else-if-used
            #         local_name = rename_dump(local_name, mime_type)
        else:
            dump_path = None

        info = dict(timestamp=line.ts
                    if LOG_FILE.format == 'json' else line.ts.timestamp(),
                    log_uuid=log_uuid,
                    log_path=log_root,
                    log_name=log_stem,
                    dump_path=dump_path,
                    local_name=local_name,
                    source_name=getattr(line, 'filename', None),
                    hosts=hosts,
                    conns=conns,
                    bro_mime_type=line.mime_type,
                    real_mime_type=mime_type,
                    hash=dict(
                        md5=getattr(line, 'md5', None),
                        sha1=getattr(line, 'sha1', None),
                        sha256=getattr(line, 'sha256', None),
                    ))
        print_file(json.dumps(info, cls=IPAddressJSONEncoder), file=INFO)
コード例 #14
0
def test_latest_item_unicode_escape():
    text = (FRONT + END).replace("{'item': 2}",
                                 u"{u'Chinese \\u6c49\\u5b57': 2}")
    data = parse(text)
    assert data['latest_matches']['latest_item'] == u"{u'Chinese 汉字': 2}"
コード例 #15
0
        tree = create_tree(scen = scene, domain_union=False, subset=sub, optimize_agents=True)
        save_tree(tree, tree_path(path_tmp, scene, sub))
        
        # for agent in ['AFB','SynchBB']:
#        iagt = 0
        for agent in agt_name.keys():
#            iagt += 1
#            if iagt >= agt_start :
            print('\t- agent : '+agent)
            
    
            ''' running the problem '''
            # print('\t\t- running DCOP')
            cmd(path_tmp, scene, agent, sub)
            
            ''' parsing the result '''
            # print('\t\t- parsing the result')
            df = parse(log_path(path_tmp, scene, sub, agent), debug = False)
            time = df['time'][0]
            nagts = df['agts'][0]
            nmes = df['mes'][0]
            nbytes = df['bytes'][0]
            nrow = [sub, agent, time, nagts, nmes, int(nmes/nagts), nbytes, int(nbytes/nagts)]
            big_df.loc[big_df.shape[0]] = nrow
   

''' writing the overall results '''
print('\n- writing all the results...')
big_df.to_csv('all-sub-incremental-3-15.csv')

print('\nall done.')