예제 #1
0
파일: data.py 프로젝트: tke578/node-1
def query(params):
    query_obj = dict()
    if bool(params.get('post_status', '')):
        query_obj["post_status"] = {
            "$in": json.loads(params.get('post_status'))
        }
    if bool(params.get('post_time', '')):
        print(params["post_time"])
        date = parse_date(date=params['post_time'], date_format="%m-%d-%Y")
        start_date = date.strftime("%Y-%m-%d") + ' 00:00'
        end_date = date.strftime("%Y-%m-%d") + ' 23:59'
        query_obj["post_time"] = {"$gt": start_date, "$lt": end_date}
    if bool(params.get('description', '')):
        query_obj["description"] = {'$regex': params.get('description')}
    if bool(params.get('last_doc_id', '')):
        query_obj["_id"] = {"$lt": ObjectId(params["last_doc_id"])}
    mongo_client = Posts()
    collection = []
    for record in mongo_client.find(filter=query_obj,
                                    view="CLIENT").sort('post_time',
                                                        -1).limit(50):
        if record.get("_id", ''):
            record["_id"] = get_object_id(record)
        collection.append(record)
    response = dict(data=collection, total_records=len(collection))
    return response
예제 #2
0
 def from_namespace(args: argparse.Namespace) -> 'CliListArgs':
     if args.date:
         return CliListArgs(parse_date(args.date))
     elif args.now:
         return CliListArgs(date.today() + timedelta(days=args.now))
     else:
         return CliListArgs(date.today())
예제 #3
0
def static_computations_1(data):
    print('computing data 1')

    # convert date string to computer representation
    for i, date_string in enumerate(data['date_strings']):
        dt = common.parse_date(date_string)
        data['datetimes'].append(dt)
        data['matplotlib_dates'].append(matplotlib.dates.date2num(dt))
예제 #4
0
    def custom_date_parser(value):
        d = common.parse_date(value)
        if not d:
            # If this was not a date - and instead was something empty - return None.
            return None

        if d.year > 2100:
            d = datetime(2100, d.month, d.day)
        return d
예제 #5
0
def parse_data(filename):
    txt = c.pdf_to_text(filename)
    date_time = c.search(r'Stand (\d.*) Uhr', txt)
    if date_time is None:
        date = c.search(r'Stand\: (\d{2}\.\d{2}\.20\d{2})', txt)
        time = c.search(r'Zeit: (\d+:\d{2})', txt)
        if date is not None and time is not None:
            date_time = '{} {}'.format(date, time)
    date = c.parse_date(date_time)

    tot_tests = parse_pcr_tot_tests(txt)

    positivity_rate = c.txt_to_float(
        c.search(r'Bei (\d+)% dieser Tests fiel das Resultat positiv aus',
                 txt))
    if positivity_rate is None:
        positivity_rate = c.txt_to_float(
            c.search(r'Positivit.tsrate( \*+| \(%\)|\*+)?\s+(\d\.?\d?)[%\s]',
                     txt,
                     index=2))
    if positivity_rate is None:
        positivity_rate = c.txt_to_float(
            c.search(r'Anteil positive Tests \(%\)(\d)?\s+(\d\.?\d?)[%\s]',
                     txt,
                     index=2))

    isolated = c.txt_to_int(
        c.search(
            r'(\d+)\s+(F.lle|Personen aufgrund einer laborbest.tigten COVID-19 Erkrankung)? in\sIsolation',
            txt,
            index=1))
    quarantined = c.txt_to_int(
        c.search(
            r'(\d+)\s?(in|Kontaktpersonen\sin\s.rztlich\sverordneter)? Quarant.ne',
            txt))
    quarantined_travel = None
    if isolated is None or quarantined is None:
        pos = txt.find('Contact Tracing')
        if pos > 0:
            pcr = re.compile(
                r'Total\s?(\*+|\(%\))?\s+(\d+\s?\d+|\d+)\s+(\d+\s?\d+|\d+)\s+(\d+ ?\d+|\d+)?\n'
            )
            #pcr = re.compile(r'Total\s?(\*+|\(%\))?\s+(\d+)\s+(\d+)\s+(\d+|\d+\s?\d+)?')
            res = pcr.search(txt, pos)
            if res is not None:
                isolated = c.txt_to_int(res[2])
                quarantined = c.txt_to_int(res[3])
                quarantined_travel = c.txt_to_int(res[4].strip())

    print('{},{},{},{},{},{},{}'.format(date, tot_tests or '', positivity_rate
                                        or '', isolated or '', quarantined
                                        or '', quarantined_travel or '',
                                        filename))
예제 #6
0
def validate_parsed_data(appid, data):
    try:
        # validate details
        assert data['details']['success'] == True, 'details success is false'
        assert data['details']['data'], 'data is empty'

        assert data['details']['data']['release_date'], 'release_date is empty'
        assert not data['details']['data']['release_date'][
            'coming_soon'], 'release date marked coming soon'  # field should always exist, and is boolean. Indicates game is not playable. Is False for early access games.
        assert data['details']['data']['release_date'][
            'date'], 'release_date -> date is empty'
        assert common.parse_date(
            data['details']['data']['release_date']
            ['date']), 'dateparser returned falsey result'  # try to parse date
        # assert dateparser.parse(data['details']['data']['release_date']['date']) > dateparser.parse('1980'), 'dateparser returned too old date (parse bug)'  # https://github.com/scrapinghub/dateparser/issues/866

        assert data['details']['data']['name'], 'name field is empty'
        assert data['details']['data'][
            'steam_appid'] == appid, 'appid does not match queried appid'  # some appids redirect to another appid. This removes the duplicates

        # validate reviews
        assert data['reviews']['success'] == True, 'reviews success is false'
        assert data['reviews']['query_summary'], 'query_summary does not exist'

        assert type(data['reviews']['query_summary']['total_positive']
                    ) == int, 'query_summary -> total_positive is not an int'
        assert data['reviews']['query_summary'][
            'total_positive'] >= 0, 'query_summary -> total_positive is negative'

        assert type(data['reviews']['query_summary']['total_negative']
                    ) == int, 'query_summary -> total_negative is not an int'
        assert data['reviews']['query_summary'][
            'total_negative'] >= 0, 'query_summary -> total_negative is negative'

        assert type(data['reviews']['query_summary']['total_reviews']
                    ) == int, 'query_summary -> total_reviews is not an int'
        assert data['reviews']['query_summary'][
            'total_reviews'] >= 0, 'query_summary -> total_reviews is negative'

        assert data['reviews']['query_summary'][
            'total_reviews'] == data['reviews']['query_summary'][
                'total_positive'] + data['reviews']['query_summary'][
                    'total_negative'], 'total reviews does not equal sum of positive and negative reviews'

    except:
        return False, (', '.join([str(x) for x in sys.exc_info()[0:2]]),
                       traceback.format_exc())

    else:
        return True, data
예제 #7
0
    def add_current_logentry(self):
        number = int(self.number)
        msg = self.msg
        date = parse_date(self.date)
        author = self.author

        if author is None: author = ''

        self.cursor.execute(
            '''
                delete from changed_path where
                    rv_repo_url = $url and rv_number = $number
            ''', {
                'url': self.repo_url,
                'number': number,
        })

        self.cursor.execute(
            '''
                delete from revision where
                    rv_repo_url = $url and rv_number = $number
            ''', {
                    'url': self.repo_url,
                    'number': number
        })

        self.cursor.execute(
            '''
                insert into revision (
                    rv_repo_url,
                    rv_number,
                    rv_author,
                    rv_timestamp,
                    rv_comment)
                values (
                    $url,
                    $number,
                    $author,
                    $timestamp,
                    $comment)
            ''', {
                'url': self.repo_url,
                'number': int(number),
                'author': author.encode('utf-8'),
                'comment': msg.encode('utf-8'),
                'timestamp': date
        })

        for action, path in self.paths:
            self.cursor.execute(
            '''
                insert into changed_path (
                    rv_repo_url,
                    rv_number,
                    cp_action,
                    cp_path)
                values (
                    $url,
                    $number,
                    $action,
                    $path)
            ''',{
                'url': self.repo_url,
                'number': int(self.number),
                'action': action.encode('utf-8'),
                'path': path.encode('utf-8'),
            })
예제 #8
0
    def generate_html(self, cursor, with_links=True):
        cursor.execute(
            '''
                select
                    min(rv_number),
                    max(rv_number),
                    count(rv_number),
                    min(rv_timestamp),
                    max(rv_timestamp)
                from revision where rv_repo_url = $url
            ''',
            {'url': self.repo_url})
        
        result = cursor.fetchall()[0]

        (min_rv_num, max_rv_num, rv_count, min_tstamp, max_tstamp) = result
        if isinstance(min_tstamp, basestring): min_tstamp = parse_date(min_tstamp, datetime.datetime)
        if isinstance(max_tstamp, basestring): max_tstamp = parse_date(max_tstamp, datetime.datetime)
        age = max_tstamp - min_tstamp

        days = age.days
        months = age.days / 30.0
        years = age.days / 356.25

        avg_per_day = float(rv_count) / float(days)
        avg_per_month = float(rv_count) / float(months)
        avg_per_year = float(rv_count) / float(years)
    
        return '''
            <div class="report">
                <a id="%(anchor_name)s"></a>
                <h2>%(title)s</h2>
                %(go_to_top_link)s
                <p>
                    Repository URL: <b>%(url)s</b>.<br/>
                    Smallest revision number: %(smallest_rv_number)d.<br/>
                    Biggest revision number: %(biggest_rv_number)d.<br/>
                    Revision count: %(rv_count)d.<br/>
                    First revision date: %(first_rv_date)s.<br/>
                    Last revision date: %(last_rv_date)s.<br/>
                    Repository age: %(age)s.<br/>
                    Average number of commits per year: %(avg_per_year).2f<br/>
                    Average number of commits per month: %(avg_per_month).2f<br/>
                    Average number of commits per day: %(avg_per_day).2f.<br/>
                </p>
            </div>
        ''' % {
            'title': self.title,
            'url': self.repo_url,
            'smallest_rv_number': min_rv_num,
            'biggest_rv_number': max_rv_num,
            'rv_count': rv_count,
            'first_rv_date': min_tstamp,
            'last_rv_date': max_tstamp,
            'age': age,
            'avg_per_day': avg_per_day,
            'avg_per_month': avg_per_month,
            'avg_per_year': avg_per_year,
            'anchor_name': self.name,
            'go_to_top_link': self.go_to_top_link(with_links),
        }
예제 #9
0
def parse_celery(logging, line):
    """ Attempt to match the line to one of three regexes. If a match is found, parse
        the line and return the metric to datadog.
    """
    success_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*Task (?P<task>[\w\.]+)\[(?P<task_id>[\w\-]+)\] succeeded in (?P<duration>\d+\.\d+)s: (?P<msg>\S+$)"

    received_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*Received task: (?P<task>[\w\.]+)\[(?P<task_id>[\w\-]+)\]"

    sending_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*Scheduler: Sending due task (?P<task_name>[\w\-\.]+) \((?P<task>.*?)\)"

    writing_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*Writing entries\.\.\."

    starting_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*beat: Starting\.\.\."

    schedule_changed_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*DatabaseScheduler: Schedule changed\."

    error_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*"

    attr_dict = {
        "metric_type": "counter",
        "unit": "request"
    }

    # Task success?
    match = re.match(success_regex, line)
    if match:
        event = match.groupdict()
        celery_event = "success"
        # could also return task name as an attribute?
        return ("celery.%s.%s" % (celery_event, event['task']),
            common.parse_date(event['timestamp']),
            1, # metric count
            attr_dict
        )

    # Task received?
    match = re.match(received_regex, line)
    if match:
        event = match.groupdict()
        celery_event = "received"
        return ("celery.%s.%s" % (celery_event, event['task']),
            common.parse_date(event['timestamp']),
            1, # metric count
            attr_dict
        )

    # Task sending?
    match = re.match(sending_regex, line)
    if match:
        event = match.groupdict()
        celery_event = "sending"
        return ("celery.%s.%s" % (celery_event, event['task']),
            common.parse_date(event['timestamp']),
            1, # metric count
            attr_dict
        )

    # Task writing?
    match = re.match(writing_regex, line)
    if match:
        event = match.groupdict()
        celery_event = "writing"
        return ("celery.%s" % celery_event,
            common.parse_date(event['timestamp']),
            1, # metric count
            attr_dict
        )

    # Beat starting?
    match = re.match(starting_regex, line)
    if match:
        event = match.groupdict()
        celery_event = "starting"
        return ("celery.%s" % celery_event,
            common.parse_date(event['timestamp']),
            1, # metric count
            attr_dict
        )

    # Schedule changed?
    match = re.match(schedule_changed_regex, line)
    if match:
        event = match.groupdict()
        celery_event = "schedule_changed"
        return ("celery.%s" % celery_event,
            common.parse_date(event['timestamp']),
            1, # metric count
            attr_dict
        )

    # If neither of those matched, it was probably an error. Unfortunately we only
    # get one line here, so we can't look at the whole traceback.
    match = re.match(error_regex, line)
    if match:
        event = match.groupdict()
        celery_event = "error"
        return ("celery.%s" % celery_event,
            common.parse_date(event['timestamp']),
            1, # metric count
            attr_dict
        )