def query(params): query_obj = dict() if bool(params.get('post_status', '')): query_obj["post_status"] = { "$in": json.loads(params.get('post_status')) } if bool(params.get('post_time', '')): print(params["post_time"]) date = parse_date(date=params['post_time'], date_format="%m-%d-%Y") start_date = date.strftime("%Y-%m-%d") + ' 00:00' end_date = date.strftime("%Y-%m-%d") + ' 23:59' query_obj["post_time"] = {"$gt": start_date, "$lt": end_date} if bool(params.get('description', '')): query_obj["description"] = {'$regex': params.get('description')} if bool(params.get('last_doc_id', '')): query_obj["_id"] = {"$lt": ObjectId(params["last_doc_id"])} mongo_client = Posts() collection = [] for record in mongo_client.find(filter=query_obj, view="CLIENT").sort('post_time', -1).limit(50): if record.get("_id", ''): record["_id"] = get_object_id(record) collection.append(record) response = dict(data=collection, total_records=len(collection)) return response
def from_namespace(args: argparse.Namespace) -> 'CliListArgs': if args.date: return CliListArgs(parse_date(args.date)) elif args.now: return CliListArgs(date.today() + timedelta(days=args.now)) else: return CliListArgs(date.today())
def static_computations_1(data): print('computing data 1') # convert date string to computer representation for i, date_string in enumerate(data['date_strings']): dt = common.parse_date(date_string) data['datetimes'].append(dt) data['matplotlib_dates'].append(matplotlib.dates.date2num(dt))
def custom_date_parser(value): d = common.parse_date(value) if not d: # If this was not a date - and instead was something empty - return None. return None if d.year > 2100: d = datetime(2100, d.month, d.day) return d
def parse_data(filename): txt = c.pdf_to_text(filename) date_time = c.search(r'Stand (\d.*) Uhr', txt) if date_time is None: date = c.search(r'Stand\: (\d{2}\.\d{2}\.20\d{2})', txt) time = c.search(r'Zeit: (\d+:\d{2})', txt) if date is not None and time is not None: date_time = '{} {}'.format(date, time) date = c.parse_date(date_time) tot_tests = parse_pcr_tot_tests(txt) positivity_rate = c.txt_to_float( c.search(r'Bei (\d+)% dieser Tests fiel das Resultat positiv aus', txt)) if positivity_rate is None: positivity_rate = c.txt_to_float( c.search(r'Positivit.tsrate( \*+| \(%\)|\*+)?\s+(\d\.?\d?)[%\s]', txt, index=2)) if positivity_rate is None: positivity_rate = c.txt_to_float( c.search(r'Anteil positive Tests \(%\)(\d)?\s+(\d\.?\d?)[%\s]', txt, index=2)) isolated = c.txt_to_int( c.search( r'(\d+)\s+(F.lle|Personen aufgrund einer laborbest.tigten COVID-19 Erkrankung)? in\sIsolation', txt, index=1)) quarantined = c.txt_to_int( c.search( r'(\d+)\s?(in|Kontaktpersonen\sin\s.rztlich\sverordneter)? Quarant.ne', txt)) quarantined_travel = None if isolated is None or quarantined is None: pos = txt.find('Contact Tracing') if pos > 0: pcr = re.compile( r'Total\s?(\*+|\(%\))?\s+(\d+\s?\d+|\d+)\s+(\d+\s?\d+|\d+)\s+(\d+ ?\d+|\d+)?\n' ) #pcr = re.compile(r'Total\s?(\*+|\(%\))?\s+(\d+)\s+(\d+)\s+(\d+|\d+\s?\d+)?') res = pcr.search(txt, pos) if res is not None: isolated = c.txt_to_int(res[2]) quarantined = c.txt_to_int(res[3]) quarantined_travel = c.txt_to_int(res[4].strip()) print('{},{},{},{},{},{},{}'.format(date, tot_tests or '', positivity_rate or '', isolated or '', quarantined or '', quarantined_travel or '', filename))
def validate_parsed_data(appid, data): try: # validate details assert data['details']['success'] == True, 'details success is false' assert data['details']['data'], 'data is empty' assert data['details']['data']['release_date'], 'release_date is empty' assert not data['details']['data']['release_date'][ 'coming_soon'], 'release date marked coming soon' # field should always exist, and is boolean. Indicates game is not playable. Is False for early access games. assert data['details']['data']['release_date'][ 'date'], 'release_date -> date is empty' assert common.parse_date( data['details']['data']['release_date'] ['date']), 'dateparser returned falsey result' # try to parse date # assert dateparser.parse(data['details']['data']['release_date']['date']) > dateparser.parse('1980'), 'dateparser returned too old date (parse bug)' # https://github.com/scrapinghub/dateparser/issues/866 assert data['details']['data']['name'], 'name field is empty' assert data['details']['data'][ 'steam_appid'] == appid, 'appid does not match queried appid' # some appids redirect to another appid. This removes the duplicates # validate reviews assert data['reviews']['success'] == True, 'reviews success is false' assert data['reviews']['query_summary'], 'query_summary does not exist' assert type(data['reviews']['query_summary']['total_positive'] ) == int, 'query_summary -> total_positive is not an int' assert data['reviews']['query_summary'][ 'total_positive'] >= 0, 'query_summary -> total_positive is negative' assert type(data['reviews']['query_summary']['total_negative'] ) == int, 'query_summary -> total_negative is not an int' assert data['reviews']['query_summary'][ 'total_negative'] >= 0, 'query_summary -> total_negative is negative' assert type(data['reviews']['query_summary']['total_reviews'] ) == int, 'query_summary -> total_reviews is not an int' assert data['reviews']['query_summary'][ 'total_reviews'] >= 0, 'query_summary -> total_reviews is negative' assert data['reviews']['query_summary'][ 'total_reviews'] == data['reviews']['query_summary'][ 'total_positive'] + data['reviews']['query_summary'][ 'total_negative'], 'total reviews does not equal sum of positive and negative reviews' except: return False, (', '.join([str(x) for x in sys.exc_info()[0:2]]), traceback.format_exc()) else: return True, data
def add_current_logentry(self): number = int(self.number) msg = self.msg date = parse_date(self.date) author = self.author if author is None: author = '' self.cursor.execute( ''' delete from changed_path where rv_repo_url = $url and rv_number = $number ''', { 'url': self.repo_url, 'number': number, }) self.cursor.execute( ''' delete from revision where rv_repo_url = $url and rv_number = $number ''', { 'url': self.repo_url, 'number': number }) self.cursor.execute( ''' insert into revision ( rv_repo_url, rv_number, rv_author, rv_timestamp, rv_comment) values ( $url, $number, $author, $timestamp, $comment) ''', { 'url': self.repo_url, 'number': int(number), 'author': author.encode('utf-8'), 'comment': msg.encode('utf-8'), 'timestamp': date }) for action, path in self.paths: self.cursor.execute( ''' insert into changed_path ( rv_repo_url, rv_number, cp_action, cp_path) values ( $url, $number, $action, $path) ''',{ 'url': self.repo_url, 'number': int(self.number), 'action': action.encode('utf-8'), 'path': path.encode('utf-8'), })
def generate_html(self, cursor, with_links=True): cursor.execute( ''' select min(rv_number), max(rv_number), count(rv_number), min(rv_timestamp), max(rv_timestamp) from revision where rv_repo_url = $url ''', {'url': self.repo_url}) result = cursor.fetchall()[0] (min_rv_num, max_rv_num, rv_count, min_tstamp, max_tstamp) = result if isinstance(min_tstamp, basestring): min_tstamp = parse_date(min_tstamp, datetime.datetime) if isinstance(max_tstamp, basestring): max_tstamp = parse_date(max_tstamp, datetime.datetime) age = max_tstamp - min_tstamp days = age.days months = age.days / 30.0 years = age.days / 356.25 avg_per_day = float(rv_count) / float(days) avg_per_month = float(rv_count) / float(months) avg_per_year = float(rv_count) / float(years) return ''' <div class="report"> <a id="%(anchor_name)s"></a> <h2>%(title)s</h2> %(go_to_top_link)s <p> Repository URL: <b>%(url)s</b>.<br/> Smallest revision number: %(smallest_rv_number)d.<br/> Biggest revision number: %(biggest_rv_number)d.<br/> Revision count: %(rv_count)d.<br/> First revision date: %(first_rv_date)s.<br/> Last revision date: %(last_rv_date)s.<br/> Repository age: %(age)s.<br/> Average number of commits per year: %(avg_per_year).2f<br/> Average number of commits per month: %(avg_per_month).2f<br/> Average number of commits per day: %(avg_per_day).2f.<br/> </p> </div> ''' % { 'title': self.title, 'url': self.repo_url, 'smallest_rv_number': min_rv_num, 'biggest_rv_number': max_rv_num, 'rv_count': rv_count, 'first_rv_date': min_tstamp, 'last_rv_date': max_tstamp, 'age': age, 'avg_per_day': avg_per_day, 'avg_per_month': avg_per_month, 'avg_per_year': avg_per_year, 'anchor_name': self.name, 'go_to_top_link': self.go_to_top_link(with_links), }
def parse_celery(logging, line): """ Attempt to match the line to one of three regexes. If a match is found, parse the line and return the metric to datadog. """ success_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*Task (?P<task>[\w\.]+)\[(?P<task_id>[\w\-]+)\] succeeded in (?P<duration>\d+\.\d+)s: (?P<msg>\S+$)" received_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*Received task: (?P<task>[\w\.]+)\[(?P<task_id>[\w\-]+)\]" sending_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*Scheduler: Sending due task (?P<task_name>[\w\-\.]+) \((?P<task>.*?)\)" writing_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*Writing entries\.\.\." starting_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*beat: Starting\.\.\." schedule_changed_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*DatabaseScheduler: Schedule changed\." error_regex = "\[(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.*?(\d+)).*" attr_dict = { "metric_type": "counter", "unit": "request" } # Task success? match = re.match(success_regex, line) if match: event = match.groupdict() celery_event = "success" # could also return task name as an attribute? return ("celery.%s.%s" % (celery_event, event['task']), common.parse_date(event['timestamp']), 1, # metric count attr_dict ) # Task received? match = re.match(received_regex, line) if match: event = match.groupdict() celery_event = "received" return ("celery.%s.%s" % (celery_event, event['task']), common.parse_date(event['timestamp']), 1, # metric count attr_dict ) # Task sending? match = re.match(sending_regex, line) if match: event = match.groupdict() celery_event = "sending" return ("celery.%s.%s" % (celery_event, event['task']), common.parse_date(event['timestamp']), 1, # metric count attr_dict ) # Task writing? match = re.match(writing_regex, line) if match: event = match.groupdict() celery_event = "writing" return ("celery.%s" % celery_event, common.parse_date(event['timestamp']), 1, # metric count attr_dict ) # Beat starting? match = re.match(starting_regex, line) if match: event = match.groupdict() celery_event = "starting" return ("celery.%s" % celery_event, common.parse_date(event['timestamp']), 1, # metric count attr_dict ) # Schedule changed? match = re.match(schedule_changed_regex, line) if match: event = match.groupdict() celery_event = "schedule_changed" return ("celery.%s" % celery_event, common.parse_date(event['timestamp']), 1, # metric count attr_dict ) # If neither of those matched, it was probably an error. Unfortunately we only # get one line here, so we can't look at the whole traceback. match = re.match(error_regex, line) if match: event = match.groupdict() celery_event = "error" return ("celery.%s" % celery_event, common.parse_date(event['timestamp']), 1, # metric count attr_dict )