Beispiel #1
0
def scraper(url, resp) -> list:
    if resp.status == 200:
        # no error status: return valid URLs
        links = extract_next_links(url, resp)
        get_reports()
        return links
    else:
        return []
Beispiel #2
0
def exportcsv(bot, update):
    try:
        users[update.message.chat.id]
    except KeyError:
        undefined(bot, update)
    update.message.reply_text(botstring["exportcsv_start"])
    reports = get_reports()
    start = "%s%s%s" % (users[update.message.chat.id]["begin_year"],
                        users[update.message.chat.id]["begin_month"],
                        users[update.message.chat.id]["begin_day"])
    end = "%s%s%s" % (users[update.message.chat.id]["end_year"],
                      users[update.message.chat.id]["end_month"],
                      users[update.message.chat.id]["end_day"])
    reports_sorted = sort_reports(reports, str(start), str(end))
    if len(reports_sorted) > 0:
        update.message.reply_text(botstring["exportcsv_info"] %
                                  (len(reports_sorted)))
        json_to_csv(reports_sorted, str(update.message.chat.id))
        update.message.reply_text(botstring["exportcsv"])
        try:
            bot.send_document(chat_id=update.message.chat.id,
                              timeout=360,
                              document=open(
                                  '%s.csv' % (str(update.message.chat.id)),
                                  'rb'))
            logging.info("The file %s.csv was sent" %
                         (str(update.message.chat.id)))
            os.remove('%s.csv' % (str(update.message.chat.id)))
        except:
            logging.info("Sending of document not possible")
    else:
        logging.info("For timeframe no reports")
        update.message.reply_text(botstring["exportcsv_no_reports"])
Beispiel #3
0
def reports():
	reports = report_util.get_reports()

	# Return as JSON if requested.
	if get_format(request) == 'json':
		return jsonify(status=200, reports=reports)

	return render_template('reports.html', reports=reports)
Beispiel #4
0
def deliver_reports(contracts_path, reports_path,
        template, output_path, bucket=500, testing=False):
    reports = get_reports(contracts_path, reports_path)
    popper = Popper(reports)
    pops = popper.pop(bucket)
    while pops:
        j = push_reports.delay(pops, template, output_path, testing)
        logger.info("Job id:%s | %s/%s" % (
            j.id, len(pops), len(popper.items))
        )
        pops = popper.pop(bucket)
Beispiel #5
0
def exportpdf(bot, update):
    try:
        users[update.message.chat.id]
    except KeyError:
        undefined(bot, update)
    print(users[update.message.chat.id])
    update.message.reply_text(botstring["exportpdf_start"])
    reports = get_reports()
    start = "%s%s%s" % (users[update.message.chat.id]["begin_year"],
                        users[update.message.chat.id]["begin_month"],
                        users[update.message.chat.id]["begin_day"])
    end = "%s%s%s" % (users[update.message.chat.id]["end_year"],
                      users[update.message.chat.id]["end_month"],
                      users[update.message.chat.id]["end_day"])
    reports_sorted = sort_reports(reports, str(start), str(end))
    if len(reports_sorted) > 0:
        update.message.reply_text(botstring["exportpdf_info"] %
                                  (len(reports_sorted)))
        mergeList = []
        for report in reports_sorted:
            name = "%s.pdf" % ((report["report_link"][46:-1]))
            mergeList.append(name)
            link = report["report_link"]
            html_to_pdf(link, name, False)
        merge_pdf(mergeList, "pdfs", str(update.message.chat.id))
        add_footer("pdfs/%s" % (update.message.chat.id),
                   "pdfs/%s" % (update.message.chat.id))
        update.message.reply_text(botstring["exportpdf"])
        try:
            bot.send_document(
                chat_id=update.message.chat.id,
                timeout=360,
                document=open('pdfs/%s.pdf' % (str(update.message.chat.id)),
                              'rb'))
            logging.info("The file pdfs/%s.pdf was sent" %
                         (str(update.message.chat.id)))
            if os.path.exists('pdfs/%s.pdf' % (str(update.message.chat.id))):
                os.remove('pdfs/%s.pdf' % (str(update.message.chat.id)))
            else:
                logging.info("The file pdfs/%s.pdf does not exist" %
                             (str(update.message.chat.id)))
        except:
            logging.info("Sending of document not possible")
    else:
        logging.info("For timeframe no reports")
        update.message.reply_text(botstring["exportpdf_no_reports"])
Beispiel #6
0
def deliver_reports(contracts_path,
                    reports_path,
                    template,
                    output_path,
                    source,
                    data,
                    bucket=500):
    try:
        source_class = eval(source)
    except NameError:
        logger.error('%s source type not found' % str(source))

    reports = get_reports(contracts_path, reports_path)
    popper = Popper(reports)
    pops = popper.pop(bucket)
    while pops:
        push_reports(pops, template, output_path, source_class, data)
        pops = popper.pop(bucket)
def update():
    try:
        logging.info("update started")
        filename = "%s.db" % (str(random.randint(1, 999999999999999)))
        logging.info("Created new db %s" % (filename))
        # filename = "sqlite:///reports.db"
        # filename  = "mysql+mysqlconnector://gobitodic:[email protected]/gobitodic$reports"
        db = dataset.connect("sqlite:///%s" % (filename))
        reportsdb = db["reports"]
        db.begin()
        reports = get_reports()
        for report in reports:
            reportsdb.insert(report)
        db.commit()
        db.engine.dispose()
        logging.info("DB updated")
    except Exception as e:
        logging.error("Update() didnt' work; Error: %s" % (e))
        os.remove(filename)
    return filename
Beispiel #8
0
def statistics(bot, update):
    reports = get_reports()
    update.message.reply_text(botstring["statistics"] % (len(reports)))
Beispiel #9
0
def report(report_id):
	report = report_util.get_report(report_id)
	if not report:
		abort(404)

	dates = report_util.get_dates()
	if not dates:
		abort(500)

	min_date = report.get('minDate')
	max_date = report.get('maxDate')
	date_pattern = report.get('datePattern')
	max_date_metric = report.get('maxDateMetric')

	# TODO: If a report doesn't explicitly have a min/max date,
	# but all of its metrics do, take the min/max of the metrics
	# and set that as the report's implicit min/max date.

	# Omit dates for which this report has no data.
	if max_date_metric:
		max_date = report_util.get_latest_date(max_date_metric)
	if min_date:
		dates = dates[:dates.index(min_date) + 1]
	if max_date:
		dates = dates[dates.index(max_date):]
	if date_pattern:
		date_pattern = re.compile(date_pattern)
		dates = [d for d in dates if date_pattern.match(d)]

	report['dates'] = dates
	report['lenses'] = report_util.get_lenses()

	start = request.args.get('start')
	end = request.args.get('end')

	# Canonicalize single-date formats.
	if end and not start:
		start, end = end, start

	# Canonicalize aliases.
	if start == 'latest':
		start = dates[0]
	elif start == 'earliest':
		start = dates[-1]
	if end == 'latest':
		end = dates[0]
	elif end == 'earliest':
		end = dates[-1]

	# This is longhand for the snapshot (histogram) view.
	if start == end:
		end = None

	# This is shorthand for the trends (timeseries) view.
	if not start and not end:
		# The default date range is 24 crawls (1 year).
		# May be shorter if the report's minimum date is more recent.
		start = dates[min(24, len(dates) - 1)]
		end = dates[0]

	if start and start not in dates:
		abort(400)
	if end and end not in dates:
		abort(400)

	viz = report_util.VizTypes.HISTOGRAM if (start and not end) else report_util.VizTypes.TIMESERIES

	if viz == report_util.VizTypes.TIMESERIES and report.get('timeseries') and not report.get('timeseries').get('enabled'):
		end = None
		viz = report_util.VizTypes.HISTOGRAM

		# The default for histograms should be the latest date.
		if not request.args.get('start'):
			start = dates[0]

	lens_id = get_lens_id(request)
	lens = report_util.get_lens(lens_id)
	if lens:
		report['lens'] = lens

	report['view'] = get_report_view(report, request)

	# Determine which metrics should be enabled for this report.
	for metric in report['metrics']:
		# Get a list of reports that also contain this metric.
		metric['similar_reports'] = report_util.get_similar_reports(metric['id'], report_id)

		# Mark the lens used for this metric, if applicable.
		if lens:
			metric['lens'] = lens

		metric[viz] = metric.get(viz, {})
		enabled = metric[viz].get('enabled', True)
		min_date = metric[viz].get('minDate', start)
		max_date = metric[viz].get('maxDate', end)

		# Disabled metrics should stay that way.
		if not enabled:
			continue

		# Disable the metric if it start/end is outside of the min/max window.
		enabled = start >= min_date
		if max_date and enabled:
			enabled = start <= max_date
		if end and enabled:
			enabled = end <= max_date

		metric[viz]['enabled'] = enabled


	if not request.script_root:
		request.script_root = url_for('report', report_id=report_id, _external=True)

	# Return as JSON if requested.
	if get_format(request) == 'json':
		return jsonify(status=200, report=report, start=start, end=end, viz=viz)

	return render_template('report/%s.html' % viz,
						   viz=viz,
						   reports=report_util.get_reports(),
						   report=report,
						   start=start,
						   end=end)
Beispiel #10
0
def faq():
	return render_template('faq.html',
						   reports=report_util.get_reports(),
						   faq=faq_util)
Beispiel #11
0
def about():
	return render_template('about.html', reports=report_util.get_reports())
Beispiel #12
0
def index():
	return render_template('index.html',
						   reports=report_util.get_reports(),
						   featured_reports=report_util.get_featured_reports(),
						   faq=faq_util)
Beispiel #13
0
},
          content_security_policy_nonce_in=['script-src'])
def sitemap():
    delattr(request, 'csp_nonce')
    xml = render_template('sitemap.xml')
    resp = app.make_response(xml)
    resp.mimetype = "text/xml"
    return resp


if __name__ == '__main__':
    # This is used when running locally. Gunicorn is used to run the
    # application on Google App Engine. See entrypoint in app.yaml.

    update_config()
    report_util.get_reports()

    # If the 'background' command line argument is given:
    #    python main.py background &
    # then run in non-debug mode, as debug mode can't be backgrounded
    # but debug mode is useful in general (as auto reloads on change)
    if len(sys.argv) > 1 and sys.argv[1] == 'background':
        logging.debug('Running in background mode')
        # Turn off HTTPS redirects (automatically turned off for debug)
        talisman.force_https = False
        app.run(host='0.0.0.0', port=8080)
    else:
        logging.debug('Running in debug mode')
        app.run(host='0.0.0.0', port=8080, debug=True)

# [END app]
Beispiel #14
0
def reports():
    return render_template('reports.html', reports=report_util.get_reports())
def report(report_id):
    report = reportutil.get_report(report_id)
    if not report:
        abort(404)

    dates = reportutil.get_dates()
    if not dates:
        abort(500)

    min_date = report.get('minDate')
    max_date = report.get('maxDate')

    # TODO: If a report doesn't explicitly have a min/max date,
    # but all of its metrics do, take the min/max of the metrics
    # and set that as the report's implicit min/max date.

    # Omit dates for which this report has no data.
    if min_date:
        dates = dates[:dates.index(min_date) + 1]
    if max_date:
        dates = dates[dates.index(max_date):]

    report['dates'] = dates

    start = request.args.get('start')
    end = request.args.get('end')

    # Canonicalize single-date formats.
    if end and not start:
        start, end = end, start

    # Canonicalize aliases.
    if start == 'latest':
        start = dates[0]
    elif start == 'earliest':
        start = dates[-1]
    if end == 'latest':
        end = dates[0]
    elif end == 'earliest':
        end = dates[-1]

    # This is longhand for the snapshot (histogram) view.
    if start == end:
        end = None

    # This is shorthand for the trends (timeseries) view.
    if not start and not end:
        # The default date range is 24 crawls (1 year).
        # May be shorter if the report's minimum date is more recent.
        start = dates[min(23, len(dates) - 1)]
        end = dates[0]

    if start and start not in dates:
        abort(400)
    if end and end not in dates:
        abort(400)

    viz = reportutil.VizTypes.HISTOGRAM if (
        start and not end) else reportutil.VizTypes.TIMESERIES

    if viz == reportutil.VizTypes.TIMESERIES and report.get(
            'timeseries') and not report.get('timeseries').get('enabled'):
        end = None
        viz = reportutil.VizTypes.HISTOGRAM

    # Determine which metrics should be enabled for this report.
    for metric in report['metrics']:
        # Get a list of reports that also contain this metric.
        metric['similar_reports'] = reportutil.get_similar_reports(
            metric['id'], report_id)

        metric[viz] = metric.get(viz, {})
        enabled = metric[viz].get('enabled', True)
        min_date = metric[viz].get('minDate', start)
        max_date = metric[viz].get('maxDate', end)

        # Disabled metrics should stay that way.
        if not enabled:
            continue

        # Disable the metric if it start/end is outside of the min/max window.
        enabled = start >= min_date
        if end and enabled:
            enabled = end <= max_date

        metric[viz]['enabled'] = enabled

    if not request.script_root:
        request.script_root = url_for('report',
                                      report_id=report_id,
                                      _external=True)

    return render_template('report/%s.html' % viz,
                           viz=viz,
                           reports=reportutil.get_reports(),
                           report=report,
                           start=start,
                           end=end)