def _MX_RepTime(self): res = {} self.__rep_timediffs = {} for us in self._usrset: convoReps = self._carmakerConvoReps[us] timeDiffs = [] self.__rep_timediffs[us] = timeDiffs for cvId, reps in convoReps.items(): convo = self._convos[cvId] mentionTw = None for i, tw in enumerate(convo): if 'created_at' not in tw: continue if tw['user']['screen_name'] == us and i == 0: continue if tw['user']['screen_name'] != us: mentionTw = tw continue if mentionTw is None: continue st = parsedate(mentionTw['created_at']) end = parsedate(tw['created_at']) delta = end - st timeDiffs.append(delta.total_seconds()) mentionTw = None reps -= 1 if reps == 0: break # Go to the next conversation res[us] = 0 if len(timeDiffs) > 0: res[us] = mean(timeDiffs) return res
def parse_one_date_start_end(self, freetext_date, original_date): if self.is_decade(freetext_date): # special case like 1950s startdate = self.parse_decade(freetext_date) enddate = self.parse_decade(freetext_date, end=True) else: # default: all other cases try: startdate = parsedate(freetext_date, self.PARSERINFO, default=datetime.date( datetime.MINYEAR, 1, 1), dayfirst=True) ## startdate, tokens = parsedate(freetext_date, self.PARSERINFO, default=datetime.date(datetime.MINYEAR, 1, 1), dayfirst=True, fuzzy_with_tokens = True) enddate = parsedate(freetext_date, self.PARSERINFO, default=datetime.date( datetime.MINYEAR, 12, 31), dayfirst=True) except (ValueError, TypeError): raise self.UnparsableDateWarning(original_date) logging.debug("parsed: %s : %s", startdate, enddate) ## if self.list_has_alphanumeric(tokens): ## logging.info("Could not parse tokens %s in original datestring %s", tokens, original_date) return startdate, enddate
def _parse_game(self, date_string, time_string, home_team, away_team, location_string, game_id_string, playoffs, last_game): # Parse date and time into a datetime. date = parsedate(date_string).date() time = parsedate(time_string).time() game_time = datetime.datetime.combine(date, time) # Infer the year by assuming this game must be after the last game # and the first game should be within the next 3 months. year = last_game.time.year if last_game else datetime.date.today().year game_time = game_time.replace(year=year) if last_game: if game_time < last_game.time: game_time = game_time.replace(year=year + 1) else: if game_time > datetime.datetime.now() + datetime.timedelta(days=90): game_time = game_time.replace(year=year - 1) # Parse matchup. if home_team == self._team_name: opponent = away_team is_home = True else: opponent = home_team is_home = False return Game(game_time, opponent, is_home, location_string, game_id_string, playoffs)
def update_events(): # We're only concerned with events for the last thirty days # First, delete all events older then thirty days from Event table thirty_days_ago = date.today() - timedelta(days=30) logger.debug('Deleting old events:') Event.objects.filter(start_time__lt=thirty_days_ago).delete() # Next, retrieve events from facebook until we run into the last event we know about latest_event = None try: latest_event = Event.objects.latest() except ObjectDoesNotExist: pass for edict in request_events(): logger.debug('Got event info for %s (%s)', edict['name'], edict['start_time']) if parsedate(edict['start_time']).date() < thirty_days_ago: break if latest_event and edict['id'] == latest_event.fbid: break e, created = Event.objects.get_or_create( fbid=edict['id'], start_time = parsedate(edict['start_time']), end_time = parsedate(edict['end_time']), name = edict['name'], location = edict['location'] ) e.description = edict['description'] vendor_src = (v.name for v in Vendor.objects.only('name').all()) vendor_names = list(parse_event_vendors(edict, vendor_src)) e.vendors = Vendor.objects.filter(name__in = vendor_names) e.save()
def is_recording_time(): """Check whether it is a time span where we record""" # If no time span is specified, record continuously if not REC_TIME_SPAN: return True # Loop through all time spans until we find one that's active for ts in REC_TIME_SPAN: # Get the current time now = datetime.now() # We don't store the parsed objects but keep re-parsing them # because the parser's behavior depends on the current time # and date, allowing such things as day of the week to # be specified in the time specs. start = parsedate(ts['start']) stop = parsedate(ts['stop']) # Special case if start time is greater than stop time if start > stop: # Then we check if the current time is greater than the # start time, OR less than the end time. Because of the # behavior of the dateutil parser, this makes it possible # to specify time spans that cross midnight. if now >= start or now < stop: return True else: # Normally, we're in the time span if now is between start # and stop time. if now >= start and now < stop: return True # We're not in any time span, don't record return False
def file_desc(file): locations = [] for raw_location in db.session.query(FilesLocations).filter_by( file_sha256=file.sha256): for range in raw_location[4]: location = { "archive": raw_location[1], "suite": raw_location[2], "component": raw_location[3], "begin": parsedate(range[0]).strftime("%Y%m%dT%H%M%SZ"), "end": parsedate(range[-1]).strftime("%Y%m%dT%H%M%SZ"), } locations.append(location) desc = { "name": file.name, "path": file.path, "size": file.size, "locations": locations, # TEMP: for retro-compatibility, we keep those fields taken from # the first location # "archive_name": locations[0]["archive"] if locations else None, # "first_seen": locations[0]["timestamps"][0] if locations else None } return desc
def parse_file(file_url, header_loc): # match 2/4 digits, separator, 2/4 digits, optional separator, optional 2/4 digits date_regex = r'((\d{4}|\d{2}|\d{1})(\.|\-|\/)(\d{4}|\d{2}|\d{1})?(\.|\-|\/)?(\d{4}|\d{2}))' if 'Demographics' in file_url: r = get('https://dshs.texas.gov/coronavirus/additionaldata/') soup = BeautifulSoup(r.text, 'lxml') parent = soup.find("a", {"title": "Case and Fatality Demographics Data "}) date_text = parent.nextSibling.nextSibling.text max_date = parsedate(re.search(date_regex, date_text).group(0)) elif 'district-level' in file_url: # url updates weekly, if pandas can read and rows are approx expected, then file is updated try: df = pd.ExcelFile(file_url, engine='xlrd').parse(sheet_name=0, header=header_loc) if len(df.index) > 1000: max_date = today.date() except: pass else: df = pd.ExcelFile(file_url, engine='xlrd').parse(sheet_name=0, header=header_loc) df = df.loc[:, ~df.columns.str.contains('^Unnamed')] date_text = list(df.columns)[-1] max_date = parsedate(re.search(date_regex, date_text).group(0)).date() return 1 if max_date == today.date() else 0
def get_settings(self, node): if node.find(ns + "title") is not None: self.title = node.find(ns + "title").text else: raise AttributeError("No title!") if node.find(ns + "id") is not None: self.id = node.find(ns + "id").text else: raise AttributeError("No id!") if node.find(ns + "opening-date") is not None: self.opening = parsedate(node.find(ns + "opening-date").text) else: self.opening = None if node.find(ns + "closing-date") is not None: self.closing = parsedate(node.find(ns + "closing-date").text) else: self.closing = None if node.find(ns + "template-file") is not None: self.template = node.find(ns + "template-file").text else: raise AttributeError("No template file!") if node.find(ns + "user-file") is not None: fn = "%s.users.xml" % self.id if os.path.exists(fn): self.users = etree.parse(fn) else: self.users = get_users_xml_from_csv(node.find(ns + "user-file").text) else: raise AttributeError("No users file!")
def parse_start_end(self, start, end, original_date): # parse the dates for start and end try: startdate = parsedate(start, self.PARSERINFO, default=datetime.date( datetime.MINYEAR, 1, 1), dayfirst=True) ## startdate, tokens = parsedate(start, self.PARSERINFO, default=datetime.date(datetime.MINYEAR, 1, 1), dayfirst=True, fuzzy_with_tokens = True) except (ValueError, TypeError): raise self.UnparsableDateWarning(original_date) else: ## if self.list_has_alphanumeric(tokens): ## logging.info("Could not parse tokens %s in separated datestring %s, original datestring %s", tokens, start, original_date) try: enddate = parsedate(end, self.PARSERINFO, default=datetime.date( datetime.MINYEAR, 12, 31), dayfirst=True) ## enddate, tokens = parsedate(end, self.PARSERINFO, default=datetime.date(datetime.MINYEAR, 12, 31), dayfirst=True, fuzzy_with_tokens = True) except (ValueError, TypeError): raise self.UnparsableDateWarning(original_date) else: ## if self.list_has_alphanumeric(tokens): ## logging.info("Could not parse tokens %s in separated datestring %s, original datestring %s", tokens, end, original_date) # check whether the date(s) are exact, like "07/11/1945" or approximate, like "November 1945" tmp = parsedate(end, self.PARSERINFO, default=datetime.date(datetime.MINYEAR, 1, 1), dayfirst=True) if tmp != enddate: self.IS_APPROXIMATE = True return startdate, enddate
def check_file(file_name): """ Check the given file for parse errors. :param file_name: file name to check :return: str error message. """ if file_name.endswith('.md'): parser = parse_md_file required_fields = REQUIRED_FIELDS elif file_name.endswith('.yml'): parser = parse_yml_file required_fields = REQUIRED_YAML_FIELDS else: return 'Unknown file type: %s' % file_name try: data = parser(file_name) except Exception as e: return str(e) if HOF_FILENAME_RE.search(file_name): return check_hof_data(data) if 'mfsa_id' not in data: return 'The MFSA ID must be in the filename or metadata.' for field in required_fields: if field not in data: return 'The {0} field is required in the file metadata.'.format( field) for f in data['fixed_in']: if "ESR" in f and "ESR " not in f: return "When ESR is specified, it must be of the form 'Firefox ESR XX', not 'Firefox ESRXX' (Found '" + f + "')" if 'announced' in data: try: parsedate(data['announced']).date() except Exception: return 'Failed to parse "{}" as a date'.format(data['announced']) if file_name.endswith('.yml'): for cve, advisory in data['advisories'].items(): if not CVE_RE.search(cve): return 'The cve field {0} does not appear to be valid.'.format( cve) for field in REQUIRED_YAML_ADVISORY_FIELDS: if field not in advisory: return 'The {0} field is required in the ' \ 'file metadata for {1}.'.format(field, cve) if 'bugs' in advisory: for bug in advisory['bugs']: if 'url' not in bug: return 'There is a bug entry in {} without a "url" field.'.format( cve) return None
def post(self): record = request.get_json(force=True) record['direction'] = int(record['direction']) r = redis.Redis(connection_pool=self.redis_pool) # TODO: verify timezone handling # TODO: use pub/sub to wrap the redis transaction and the postgres transaction? # begin redis transaction current_timestamp = parsedate(record['timestamp']).timestamp() previous_datetime = r.get('{}:timestamp'.format(record['dpu_id'])) if previous_datetime: previous_timestamp = parsedate(previous_datetime).timestamp() else: previous_timestamp = current_timestamp transaction = r.pipeline() if previous_timestamp > current_timestamp: self._queue_reconciliation(record['dpu_id'], current_timestamp) spaces = self.dpu_spaces(record['dpu_id']) transaction.incr(record['dpu_id']) transaction.set('{}:timestamp'.format(record['dpu_id']), record['timestamp']) record['count'], _ = transaction.execute() transaction = r.pipeline() for space in spaces: transaction.incr('{}:count'.format(space['space_id']), space['direction'] * record['direction']) transaction.execute() # begin postgres transaction with self.pg.get_db_cursor(commit=True) as cursor: cursor.execute(DPU_LOG_SQL, record) cursor.execute(DPU_TELEM_SQL, record) # TODO: add handling for unmatched spaces for space in spaces: space_record = { 'recorded': record['timestamp'], 'space_id': space['space_id'], 'dpu_id': record['dpu_id'], 'count': record['count'] + space['direction'] * record['direction'] } cursor.execute(SPACE_TELEM_SQL, space_record) print(cursor.query) return {'status': 'OK'}
def initialize_database(): """ Initialisiere die Datenbank mit Testdaten. Alle vorhandenen Daten werden geluescht. """ stations = (("Oper", "Operngasse 1", "1010", "Wien", u"Österreich"), ("Hohe Warte Stadium", "Heiligenstadt", "1190", "Wien", u"Österreich"), ("Fliegerhorst Brumowski", "Brumowskigasse 23", "3425", "Tulln an der Donau", u"Österreich"), ("FH Technikum Wien", "Höchstädtplatz 6", "1200", "Wien", u"Österreich"), ("Red Bull Ring", u"Schloßweg 1", "8724", "Spielberg", u"Österreich")) cars = (("Citroen", u"C3", "silber", 5, 4.8, 50, "W-997G"), ("Ford", u"Focus", "rot", 5, 5.9, 70, "W-997GH"), ("Smart", u"ForTwo", "gelb", 2, 3.5, 70, "W-997GI"), ("VW", u"Käfer", "Rost", 4, 6.8, 40, "W 992223" ), ("Renault", "Grand Espace", "schwarz", 7, 8.8, 120, "K 009DF"), ("McLaren", "P1", "gelb", 2, 12.3, 190, "S 99823")) kunden = (("Alice Amber", "Nussdorfer Strasse 77", "1090", "Wien", u"Österreich"), ("Bob Builder", "Lederwaschstrasse 2", "5589", "Tamsweg", u"Österreich"), ("Istvan Nagy", "Halasz utca 25", "9400", "Sopron", u"Ungarn"), ("Ignaz Zurbgriggen", "Wildbachstrasse 9", "8340", "Hinwil", u"Schweiz"), ("Charly Custer", "Albrechtgasse 530", "3571", "Gars am Kamp", u"Österreich"), ("Eve Easter", "Kardinal Piffl Platz 2", "3400", "Klosterneuburg", u"Österreich")) db.drop_all() db.create_all() for c in cars: d = dict(zip( ('manufacturer', 'typ', 'color', 'seats', 'consumption', 'price', 'platenumber'), c)) db.session.add(Car(**d)) for s in stations: d = dict(zip( ('name', 'street', 'plz', 'city', 'country'), s)) db.session.add(Station(**d)) for k in kunden: d = dict(zip( ('name', 'street', 'plz', 'city', 'country'), k)) db.session.add(Kunde(**d)) db.session.flush() c3, focus, fortwo, kaefer, espace, p1 = Car.query.all() oper, hohewarte, lale, fh, rbr = Station.query.all() alice, bob, istvan, ignaz, charly, eve = Kunde.query.all() c3.station = hohewarte focus.station = lale espace.station = hohewarte Leihe(kunde=alice, car=c3, von=parsedate('2014-02-01 14:00'), bis=parsedate('2014-02-10 10:00'), returned=True, station_abhol=hohewarte, station_return=hohewarte) Leihe(kunde=alice, car=c3, von=parsedate('2014-02-14 08:00'), bis=parsedate('2014-02-15 22:00'), returned=True, station_abhol=hohewarte, station_return=lale) Leihe(kunde=ignaz, car=c3, von=datetime.today() - timedelta(days=2), bis=datetime.today() + timedelta(days=5), returned=False, station_abhol=hohewarte, station_return=rbr) Leihe(kunde=istvan, car=p1, von=parsedate('2014-01-14 09:00'), bis=datetime.today() + timedelta(days=2), returned=False, station_abhol=lale, station_return=fh) Leihe(kunde=charly, car=p1, von=datetime.today() + timedelta(days=10), bis=datetime.today() + timedelta(days=12), returned=False, station_abhol=lale, station_return=fh) db.session.commit()
def CutSeconds(twts,seconds): start = parsedate(twts[0]['created_at']) startI = 0 endI = len(twts) - 1 for i, tw in enumerate(twts): if i == 0: continue end = parsedate(tw['created_at']) delta = end - start if delta.total_seconds() >= seconds: endI = i break return (startI, endI)
def _MX_RepTime(self,tw=None): if self._state == 'start': return if self._state == 'end': return if 'in_reply_to_status_id' not in tw or tw['in_reply_to_status_id'] is None: return "." end = parsedate(tw['created_at']) startw = self.__twts_dic.get(tw['in_reply_to_status_id']) if startw is None: return "." start = parsedate(startw['created_at']) dif = end - start return dif.total_seconds()
def download_eia_data(self,operation): """ First check to see if file on server is newer than what we have on file system. If so, then download and process where necessary """ #set initial timestamps in case of redirects file_time = parsedate('2000-01-01 00:00+00:00') url_time = parsedate('2000-01-01 00:00+00:00') file_unix_time = file_time.strftime('%s') url_unix_time = url_time.strftime('%s') if operation == 'reserves': url = self.EIA_DL_URL out = self.EIA_OUTFILE if operation == 'pricing': url = self.EIA_OIL_PRICE_URL out = self.EIA_OIL_PRICE_OUTFILE r = requests.head(url)#, allow_redirects=True) print(f'RET: {r}') if r.status_code == 301: #sometimes a permanent redirect is given. take header Location as url for Get url = r.headers['Location'] else: url_time = r.headers['last-modified'] url_date = parsedate(url_time) url_unix_time = url_date.strftime('%s') print(url_date.strftime('%s')) #check to see if file exists. If not, download if(os.path.isfile(out)): file_time = datetime.fromtimestamp(os.path.getmtime(out)) file_unix_time = file_time.strftime('%s') print(f"Here: {file_time}") else: file_time = parsedate('2000-01-01 00:00+00:00') file_unix_time = file_time.strftime('%s') if (url_unix_time >= file_unix_time) or os.path.isfile(out) == False: response = requests.get(url) totalbits = 0 if response.status_code == 200: with open(out, 'wb') as f: for chunk in response.iter_content(chunk_size=1024): if chunk: totalbits += 1024 f.write(chunk) print(f"Downloaded {operation} file = ",totalbits*1025,"KB...") return(1) else: print("Local file is latest.") return(1)
def test_feedPubDateNotOverriddenByEpisode(self): self.fg.episodes[0].publication_date = \ datetime.datetime(2015, 1, 1, 15, 0, tzinfo=pytz.utc) pubDate = self.fg._create_rss().find("channel").find("pubDate") # Now it uses the episode's published date assert pubDate is not None assert parsedate(pubDate.text) == self.fg.episodes[0].publication_date new_date = datetime.datetime(2016, 1, 2, 3, 4, tzinfo=pytz.utc) self.fg.publication_date = new_date pubDate = self.fg._create_rss().find("channel").find("pubDate") # Now it uses the custom-set date assert pubDate is not None assert parsedate(pubDate.text) == new_date
def on_order(self, rr): tlogger.debug(rr) #rr = r['result'] id = rr['clientOrderId'] status = rr['status'] rtype = rr['reportType'] o = None try: o = self.orders[id] except KeyError: pass symbol = self.convert_symbol(rr['symbol'],0) side = rr['side'] amount = float(rr['quantity']) filled = float(rr['cumQuantity']) remaining = amount-filled if status not in ('canceled','filled','suspended','expired') else 0 if o is None: #(status == "new") #"2017-10-20T12:29:43.166Z" dto = parsedate(rr['createdAt']).replace(tzinfo=None) ts = timestamp_ms(dto) price = float(rr['price']) self.add_order(id=id, symbol=symbol, side=side, price=price, amount=amount, timestamp=ts, remaining=remaining, filled=filled, params={'info': rr}) else: # Can the server send "canceled"/replaced message twice, in response # to both cancelOrder/cancelReplaceOrder and subscribeReports? # Worse yet, the "expired" may arrive sooner than update, # thus have to check that remaining is smaller than previous, and filled larger remaining = min(remaining, o['remaining']) filled = max(filled, o['filled']) if status in ('canceled','filled','suspended','expired'): self.update_order(id, 0, filled, params={'info': rr}) #self.log_error('received unregistered order {}'.format(id)) elif status in ('partiallyFilled','new'): self.update_order(id, remaining, filled, params={'info': rr}) else: #order self.log_error('received unknown order status. r: {}'.format(rr)) if rtype == 'trade': tid = rr['tradeId'] tprice = float(rr['tradePrice']) tamount = float(rr['tradeQuantity']) tdto = parsedate(rr['updatedAt']).replace(tzinfo=None) tts = timestamp_ms(tdto) #fee is negative when we are paid the rebate #NB! fee is always in payout currency fee = float(rr['tradeFee']) self.add_fill(id=tid, symbol=symbol, side=side, price=tprice, amount=tamount, timestamp=tts, order=id, fee=fee, params={'info': rr.copy()})
def _mongo_kv(schema_field, value): """ Returns a tuple of the MongoDB field name and validated value The the schema_field is multi_valued, value will always be returned as a list. :param schema_field dict: The schema for the field being evaluated, taken from DocStore schema.fields list :param value: Value to insert for this field. Value must be of the same type as specified in the schema_field, or the value must be able to be cast into that type. :return kv: Tuple of (mongo_field_name, mongo_value) :rtype tuple: """ if value is None: return schema_field['mongo_field'], None field_type = schema_field['type'] # Ensure multi_valued value is a list multi_valued = schema_field['multi_valued'] if multi_valued and not isinstance(value, list): value = [value] if field_type == 'int': value = [int(v) for v in value] if multi_valued else int(value) elif field_type == 'boolean': value = [bool(v) for v in value] if multi_valued else bool(value) # Datetime should be inserted into MongoDB as python datetime objects elif field_type == 'datetime': if type(value) is not datetime.datetime and not multi_valued: value = [parsedate(v) for v in value] if multi_valued else parsedate(value) # Location values should be inserted into MongoDB as two-element lists with x,y coordinates elif field_type == 'point': value = list(value)[:2] elif field_type == 'float': value = [float(v) for v in value] if multi_valued else float(value) # If multi_valued_type is "set", ensure no duplicates if multi_valued and \ schema_field['multi_valued_operation'] == 'replace' and \ schema_field['multi_valued_type'] == 'set': value = list(set(value)) return schema_field['mongo_field'], value
def on_new_order(self, item): #["n", 148, 6083059, 1, "0.03000000", "2.00000000", "2018-09-08 04:54:09", "2.00000000", None] #["n", <currency pair id>, <order number>, <order type>, "<price>", "<amount>", "<date>", "<original amount ordered>" "<clientOrderId>"] _, pair_id, oid, otype, price, remaining, tstr, amount, clientOrderId = item[: 9] #Convert to string because api.create_order returns id as string oid = str(oid) symbol = self.id_to_symbol(pair_id) side = 'buy' if otype == 1 else 'sell' price = float(price) amount = float(amount) dto = parsedate(tstr) if dto.tzinfo is not None: logger2.error( 'POLONIEX HAS CHANGED THEIR DATE FORMAT: {}, {}'.format( tstr, dto)) ts = timestamp_ms(dto.replace(tzinfo=None)) remaining = float(remaining) #print('on_order:',oid,symbol,side,price,amount,ts,remaining,filled,payout) try: self.orders[oid] except KeyError: #set filled to 0 because filled (and payout) is updated by trades self.add_order(id=oid, symbol=symbol, side=side, price=price, amount=amount, timestamp=ts, remaining=remaining, filled=0) else: self.update_order(id=oid, remaining=remaining)
def on_fill(self, item): #["t", 12345, "0.03000000", "0.50000000", "0.00250000", 0, 6083059, "0.00000375", "2018-09-08 05:54:09"] #['t', 9394539, '0.00057427', '0.00000476', '0.00000000', 0, 274547887461] #["t", <trade ID>, "<rate>", "<amount>", "<fee multiplier>", <funding type>, <order number>, <total fee>, <date>, "<clientOrderId>", "<trade total>"] #funding_type: 0 (exchange wallet), 1 (borrowed funds), 2 (margin funds), or 3 (lending funds). _, tid, price, amount, fee_rate, funding_type, oid = item[:7] tid, oid = str(tid), str(oid) price = float(price) amount = float(amount) fee_rate = float(fee_rate) total_fee = float(item[7]) if len(item) > 7 else None dto = parsedate(item[8]) if len(item) > 8 else dt.utcnow() if dto.tzinfo is not None: logger2.error( 'POLONIEX HAS CHANGED THEIR DATE FORMAT: {}, {}'.format( item[8], dto)) ts = timestamp_ms(dto.replace(tzinfo=None)) self.add_fill(id=tid, symbol=None, side=None, price=price, amount=amount, fee_rate=fee_rate, timestamp=ts, order=oid)
def _parse(self, raw_tiddler): ''' :type raw_tiddler: str :return: None ''' DIVIDER = '\n\n' idx_header_end = raw_tiddler.index(DIVIDER) header_lines = raw_tiddler[:idx_header_end] to_parse = { 'created': False, 'modified': False, 'tags': False, 'title': False, 'type': False, } for line_ in header_lines: line = line_.strip() for field_name in to_parse: entry_marker = '%s:' % field_name if not line.startswith(entry_marker): continue data = line.split(None, 1)[-1] if entry_marker in ('created', 'modified'): setattr(self, entry_marker, parsedate(data)) elif entry_marker == 'tags': self.tags = [tag.strip() for tag in data.split(',')] elif entry_marker == 'title': self.title = data elif entry_marker == 'type': self.type = data self.content = raw_tiddler[idx_header_end + 2:].decode('utf-8')
def download_to_file(url, file): frame_records = stack()[1] caller = getmodulename(frame_records[1]).upper() user_agent = { "User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0" } try: r = requests.head(url, headers=user_agent) url_datetime = parsedate(r.headers['Last-Modified']).astimezone() file_datetime = datetime.datetime.fromtimestamp( path.getmtime(get_abspath_source_file(file))).astimezone() if (url_datetime > file_datetime): logging.info( '{} File on server is newer, so downloading update to {}'. format(caller, get_abspath_source_file(file))) actual_download_to_file(url, file, user_agent) else: logging.info( '{} File on server is older, nothing to do'.format(caller)) except KeyError as exc: logging.warning( '{} KeyError in the headers. the {} header was not sent by server {}. Downloading file' .format(caller, str(exc), url)) actual_download_to_file(url, file, user_agent) except FileNotFoundError as exc: logging.info("{} File didn't exist, so downloading {} from {}".format( caller, file, url)) actual_download_to_file(url, file, user_agent) except Exception as exc: logging.warning('{} General exception occured: {}.'.format( caller, str(exc))) actual_download_to_file(url, file, user_agent)
def commit_datetimes_since(repository: Path, since: date, until_inclusive: date = None) -> List[datetime]: """ Return all commit datetimes authored since given date. Parameters ---------- repository : Path since : date until_inclusive : List[datetime] Returns ------- author_date : List[datetime]: """ if until_inclusive is None: until_inclusive = date.today() return [ parsedate(author_date) for author_date in subprocess.check_output( [ 'git', 'log', '--pretty=%aI', '--author=Gilles', f'--since={since.isoformat()}', f'--until={until_inclusive.isoformat()}' ], cwd=str(repository), universal_newlines=True).splitlines() ]
def normalize(field_name, field_value, field_metadata): field_value = field_value.strip() field_type = field_metadata['type'] try: if field_type == 'Date': return parsedate(field_value).strftime('%Y-%m-%d') elif field_type == 'Boolean': field_value = field_value.lower() if field_value in ('t', 'true', 'y', 'yes', '1'): return '1' elif field_value in ('f', 'false', 'n', 'no', '0'): return '0' else: return None elif field_type == 'Number': if field_metadata['numberType'] in ('integer', 'long'): return int(field_value) else: return float(field_value) elif field_type == 'StaticList': possible_values = field_metadata['values'] if not possible_values: return field_value field_value = field_value.lower() for key, value in possible_values.items(): if field_value == key.lower() or field_value == value.lower(): return key return None else: validationRegex = field_metadata['validationRegExp'] if validationRegex and not re.fullmatch(validationRegex, field_value): return None return field_value except ValueError: return None
def get_build_depends_timestamps(self): """ Returns a dict with keys Debian archives and values lists of tuple(timestamp, pkgs) where pkgs is a list of packages living there """ required_timestamps = {} for pkg in self.buildinfo.get_build_depends(): if not pkg.timestamp: self.get_bin_date(pkg) timestamp = parsedate(pkg.timestamp).strftime("%Y%m%dT%H%M%SZ") location = f"{pkg.archive_name}+{pkg.suite_name}+{pkg.component_name}" required_timestamps.setdefault(location, {}).setdefault(timestamp, []).append(pkg) # We store timestamp value itself for the base mirror used for creating chroot self.buildinfo.required_timestamps.setdefault(location, []).append(timestamp) location_required_timestamps = {} for location, timestamps in required_timestamps.items(): # sort by the number of packages found there, convert to list of tuples timestamps = sorted(timestamps.items(), key=lambda x: len(x[1]), reverse=True) location_required_timestamps[location] = timestamps return location_required_timestamps
def _loadAndSendData(self, sock, filePath, metricName): """ Returns the list of labels from the csv at filePath. Date and value fields are sent to the metric specified. As a side effect this creates the metric. :param sock: A connected socket object :param filePath: The csv with data to handle :param metricName: The target custom metric we will send data to """ labels = [] for (dttm, value, label) in self._loadDataGen(filePath): # Parse date string dttm = parsedate(dttm) # Convert to seconds since epoch (Graphite wants this) dttm = epochFromNaiveUTCDatetime(dttm) dttm = int(dttm) # Add data sock.sendall("%s %r %s\n" % (metricName, float(value), dttm)) # Save the label for use later # Convert strings to appropriate numerical type try: labels.append(int(label)) except ValueError: labels.append(float(label)) self.gracefullyCloseSocket(sock) return labels
def sync_files(remote_url: str, local_path: pathlib.Path): ''' Sync remote url with local path, copy over modified time ''' logging.info(f'Downloading: {remote_url}') try: session = _create_https_session() req = session.get(remote_url) except requests.exceptions.ContentDecodingError: logging.error(f"failed to decode {remote_url}") raise RuntimeError except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): return RuntimeError if req.status_code == 200: local_path.parent.mkdir(parents=True, exist_ok=True) if local_path.suffix.endswith('gz'): with gzip.open(local_path, 'wb') as f: f.write(req.content) else: with open(local_path, 'wb') as f: f.write(req.content) modified_time = parsedate(req.headers['Last-Modified']).timestamp() access_time = datetime.utcnow().timestamp() os.utime(local_path, (access_time, modified_time)) else: raise RuntimeError(f'{req.status_code} returned')
def from_date_string(ldate): # type: (str) -> ldate parsed = parsedate(ldate) # fix for python < 3.6 if not parsed.tzinfo: parsed = parsed.replace(tzinfo=pytz.utc) return parsed.astimezone(pytz.utc).date()
def default(self, path, **kwargs): listing = self.listing() if type(listing) != list: # cache is corrupt self.cache.delete("listing") listing = self.listing() listing.reverse() url_listing = [p["path"] for p in listing] if not "/" + path in url_listing: raise cherrypy.HTTPError(404, "File not found") post_index = url_listing.index("/" + path) prev_post = next_post = None if post_index > 0: prev_post = listing[post_index - 1] if post_index < len(listing) - 1: next_post = listing[post_index + 1] src = self.dropbox.read_file(path + ".md") mdown = get_markdown() html = mdown.convert(src) tpl_post = self.tpl.get_template("post.html") return tpl_post.render(body=html, reading_time=reading_time(src), page_title=mdown.Meta["title"][0], date=parsedate(mdown.Meta["date"][0]), prev_post=prev_post, next_post=next_post, todays_year=time.strftime("%Y"))
def default(self, path, **kwargs): listing = self.listing() if type(listing) != list: # cache is corrupt self.cache.delete("listing") listing = self.listing() listing.reverse() url_listing = [p["path"] for p in listing] if not "/" + path in url_listing: raise cherrypy.HTTPError(404, "File not found") post_index = url_listing.index("/" + path) prev_post = next_post = None if post_index > 0: prev_post = listing[post_index - 1] if post_index < len(listing) - 1: next_post = listing[post_index + 1] src = self.dropbox.read_file(path + ".md") mdown = get_markdown() html = mdown.convert(src) tpl_post = self.tpl.get_template("post.html") return tpl_post.render(body=html, reading_time=reading_time(src), page_title=mdown.Meta["title"][0], date=parsedate(mdown.Meta["date"][0]), prev_post=prev_post, next_post=next_post)
def get_summaries(section, taxons, no_cache): """ defines and eventually gets the assembly summary files""" summaries = set() for url, dest_file in make_urls(section, taxons): r = requests.head(url) url_time = r.headers['last-modified'] url_date = parsedate(url_time) download_file = True if os.path.exists(dest_file) and not no_cache: if url_date <= datetime.datetime.fromtimestamp( os.path.getmtime(dest_file), tz=datetime.timezone.utc): download_file = False if download_file: logging.getLogger().info( f"Downloading {section} summary file from: {url}") if not os.path.exists(os.path.dirname(dest_file)): os.mkdir(os.path.dirname(dest_file)) with open(dest_file, "w") as dfs: dfs.write(requests.get(url).text) os.utime(dest_file, (int(url_date.timestamp()), int(url_date.timestamp()))) else: logging.getLogger().info(f"Using cached file: {dest_file}") summaries.add(dest_file) return summaries
def group_events_by_day(events): d = defaultdict(list) for event in events: date = parsedate(event["date"]) d[date.isoformat()].append(event) return d
def update_event(event): start = parsedate(event["date"] + " " + event["start"]) end = parsedate(event["date"] + " " + event["end"]) timezone = dateutil.tz.gettz("Australia/Melbourne") start = start.replace(tzinfo=timezone) end = end.replace(tzinfo=timezone) return { **event, **{ "start_iso": start.isoformat(), "end_iso": end.isoformat() } }
def from_utc_string(dt): # type: (str) -> datetime parsed = parsedate(dt) # fix for python < 3.6 if not parsed.tzinfo: parsed = parsed.replace(tzinfo=pytz.utc) return parsed.astimezone(pytz.utc)
def is_modified(url, dstFile): r = requests.head(url) url_time = r.headers['last-modified'] url_date = parsedate(url_time) file_time = datetime.datetime.fromtimestamp(os.path.getmtime(dstFile)) print(url_date.timestamp(), file_time.timestamp()) return url_date.timestamp() > file_time.timestamp()
def get_datetime(txt_record): datetime_string_matches = re.findall('^.*Last update (.*)\.', txt_record) date = "" if len(datetime_string_matches) > 0: date = datetime_string_matches[0] return parsedate(date)
def cp(self, link: str): ''' Copy file if it doesn't exist or if the remote modified time is newer than the current local version. ''' logging.debug(f'Start copy on {link}') logging.debug(f'no parents: {self.no_parents}') if self.no_parents: local_link = link.replace(self.base_url, '') split_result = urlsplit(local_link) else: split_result = urlsplit(link) logging.debug(split_result) topdir = split_result.netloc.replace('.', '_') outpath = pathlib.Path(self.download_location, topdir, *split_result.path.split('/')) logging.debug(f'Checking {link} against {outpath}') if not outpath.is_file(): try: websync.sync_files(link, outpath) except RuntimeError: return None logging.debug(f"downloaded new file to {outpath}") return outpath elif self.update_existing: req = self.session.head(link) url_date = parsedate(req.headers['Last-Modified']).timestamp() if url_date > outpath.stat().st_mtime: websync.sync_files(link, outpath) logging.debug(f"downloaded newer file to {outpath}") return outpath return None
def _loadAndSendData(self, sock, filePath, metricName): """ Returns the list of labels from the csv at filePath. Date and value fields are sent to the metric specified. As a side effect this creates the metric. :param sock: A connected socket object :param filePath: The csv with data to handle :param metricName: The target custom metric we will send data to """ labels = [] for (dttm, value, label) in self._loadDataGen(filePath): # Parse date string dttm = parsedate(dttm) # Convert to seconds since epoch (Graphite wants this) dttm = epochFromNaiveUTCDatetime(dttm) dttm = int(dttm) #LOGGER.info("{TAG:CLIENT.METRIC} metric=%s:%s:%s", metricName, dttm, # value) # Add data sock.sendall("%s %r %s\n" % (metricName, float(value), dttm)) # Save the label for use later # Convert strings to appropriate numerical type try: labels.append(int(label)) except ValueError: labels.append(float(label)) self.gracefullyCloseSocket(sock) return labels
def grab(src, dest, name): src, dest, name = map(str, (src, dest, name)) logger.info(f" src, dest, name --> {src} {dest} {name}") if not os.path.exists(dest): logger.info(f"Downloading {name}") elif not zipfile.is_zipfile(dest): logger.info(f"Downloading {name} (corrupt file)") elif not src.startswith("file://"): r = requests.head(src) file_time = datetime.datetime.fromtimestamp(os.path.getmtime(dest)) url_time = file_time for _k in ['last-modified', 'Date']: if _k in r.headers: url_time = r.headers[_k] url_date = parsedate(url_time) utc = pytz.utc url_date = url_date.replace(tzinfo=utc) file_time = file_time.replace(tzinfo=utc) if url_date < file_time: # Do not download if older than the local one return logger.info(f"Downloading updated {name}") # Local file does not exist or remote one is newer if src.startswith("file://"): src2 = src.replace("file://", '') if not os.path.exists(src2): logger.info(f"Source location ({src2}) does not exist") else: logger.info(f"Copying local file from {src2}") shutil.copyfile(src2, dest) else: # urlretrieve(str(src), str(dest)) # Streaming, so we can iterate over the response. r = requests.get(src, stream=True, timeout=10, verify=False) # Total size in bytes. total_size = int(r.headers.get('content-length', 0)) logger.info(f"Requesting {src}") block_size = 1024 wrote = 0 with open("output.bin", 'wb') as f: for data in tqdm(r.iter_content(block_size), total=math.ceil(total_size // block_size), unit='KB', unit_scale=False): wrote += len(data) f.write(data) logger.info(f" total_size [{total_size}] / wrote [{wrote}] ") if total_size != 0 and wrote != total_size: logger.error( f"ERROR, something went wrong. Data could not be written. Expected to write {wrote} but wrote {total_size} instead" ) else: shutil.move("output.bin", dest) try: # Cleaning up os.remove("output.bin") except OSError: pass
def _parse_page(self, html): html = html.decode('ISO-8859-1', 'ignore') return [( r[3], parsedate('%s %s' % (r[0], r[1])), r[2] ) for r in re.findall( r'<event>.*?<date>(.*?)</date>.*?<time>(.*?)</time>.*?<location>(.*?)</location>.*?<description>(.*?)</description>.*?</event>', html, re.DOTALL)]
def download_logs(urls): for url in urls: filename = wget.detect_filename(url) new_filename = "logs/" + data.filename_from_url(url) print "Checking log " + new_filename # If the path exists, check if there's a more up to date version and, if so, remove the old and download the new... if os.path.exists(new_filename): r = requests.head(url) if "last-modified" in r.headers: url_time = r.headers['last-modified'] url_date = parsedate(url_time) file_time = datetime.datetime.fromtimestamp(os.path.getmtime(new_filename)) url_date = url_date.replace(tzinfo=None) file_time = file_time.replace(tzinfo=None) pytz.UTC.localize(url_date) pytz.UTC.localize(file_time) if url_date > file_time: os.remove(new_filename) download_log(url, new_filename) else: os.remove(new_filename) download_log(url, new_filename) # ... otherwise we just download it without any checks. else: download_log(url, new_filename)
def load_time_span(span_string): """Try to load a time span, skip invalid ones""" # Split into start and stop time string (separated by dash) m = re.match(r'^\s*(.+?)\s*-\s*(.+?)\s*$', span_string) # Quit if we didn't get a valid span if not m: print "Invalid time span: specify two time specs separated by '-'" return # Check that both time specs specify a valid time for i in range(1, 3): try: parsedate(m.group(i)) except: print "Invalid time spec: %s" % m.group(i) return # Start and end time strings REC_TIME_SPAN.append({'start': m.group(1), 'stop': m.group(2)})
def coerce_to_date(x): if isinstance(x, tuple): return datetime(*x) if isinstance(x, str): return parsedate(x) if isinstance(x, datetime): return x return None
def load_group(self, slug): root = self._load_xml(slug) group_name = root.find('name').text group_url = root.find('url').text.strip() _dml = root.find('defaultmeetinglocation') schedule = None _schedule = root.find('schedule') default_location = None if _dml is not None: use_dml = _schedule.get('usedefaultmeetinglocation', '') == 'true' if use_dml: default_location = self._extract_location(_dml) tags = [] _tags = root.find('tags') for _tag in _tags.findall('tag'): tags.append(_tag.text.lower()) if _schedule is not None: ical_feed = _schedule.find('ical') if ical_feed is not None: schedule = IcalSchedule(ical_feed.text) else: meetings = [] for meeting in _schedule.findall('meeting'): time = parsedate(meeting.find('time').text).astimezone(utc) name = meeting.find('name').text description = None _description = meeting.find('description') if _description is not None: description = self._normalize_description( _description.text) url = group_url _url = meeting.find('url') if _url is not None: url = _url.text.strip() location = self._extract_location(meeting.find('location')) location = location or default_location m = Meeting(name=name, time=time, description=description, url=url, location=location) self.nominatim.resolve_coordinates(m) meetings.append(m) schedule = StaticSchedule(meetings) thumbnail = self._get_thumbnail(slug) return UserGroup(slug=slug, name=group_name, schedule=schedule, url=group_url, default_location=default_location, tags=tags, thumbnail=thumbnail)
def _UI_TimeTweet(self,tw=None): if self._state == "start": self.__last_tweet = {us:None for us in self._usrset} return if self._state == "end": return if tw['user']['screen_name'] not in self._usrset: return "." oC = self.__last_tweet usr = tw['user']['screen_name'] lTw = oC[usr] oC[usr] = tw if lTw is None: return "." if lTw['id'] == tw['id']: return "." prev = parsedate(lTw['created_at']) latt = parsedate(tw['created_at']) delta = latt - prev if delta.total_seconds() == 0 and tw['text'] == lTw['text']: return "." return delta.total_seconds()
def check_file(file_name): """ Check the given file for parse errors. :param file_name: file name to check :return: str error message. """ if file_name.endswith('.md'): parser = parse_md_file required_fields = REQUIRED_FIELDS elif file_name.endswith('.yml'): parser = parse_yml_file required_fields = REQUIRED_YAML_FIELDS else: return 'Unknown file type: %s' % file_name try: data = parser(file_name) except Exception as e: return str(e) if HOF_FILENAME_RE.search(file_name): return check_hof_data(data) if 'mfsa_id' not in data: return 'The MFSA ID must be in the filename or metadata.' for field in required_fields: if field not in data: return 'The {0} field is required in the file metadata.'.format(field) if 'announced' in data: try: parsedate(data['announced']).date() except Exception: return 'Failed to parse "{}" as a date'.format(data['announced']) if file_name.endswith('.yml'): for cve, advisory in data['advisories'].items(): for field in REQUIRED_YAML_ADVISORY_FIELDS: if field not in advisory: return 'The {0} field is required in the ' \ 'file metadata for {1}.'.format(field, cve) return None
def get_time_between_tweets(twts,usrlst,condition=lambda x: True): tweetTimes = {usr:[] for usr in usrlst} lastTwts = {usr: None for usr in usrlst} usrset = set(usrlst) for tw in twts: if tw['user']['screen_name'] not in usrset: continue if not condition(tw): continue user = tw['user']['screen_name'] lastTwt = lastTwts[user] lastTwts[user] = tw if lastTwt is None: continue oldDate = parsedate(lastTwt['created_at']) nwDate = parsedate(tw['created_at']) diff = nwDate - oldDate tweetTimes[user].append(diff.total_seconds()) res = {usr:0 if len(tweetTimes[usr]) == 0 else sum(tweetTimes[usr])/len(tweetTimes[usr]) for usr in usrlst} return res
def _UI_TimeTweet(self,tw=None): if self._state == "start": self.__last_tweet = {us:None for us in self._usrset} self.__tweet_times = {us:[] for us in self._usrset} return if self._state == "end": oC = self.__tweet_times res = {us:(0 if len(oC[us]) == 0 else float(mean(oC[us]))) for us in self._usrset} return res if tw['user']['screen_name'] not in self._usrset: return oC = self.__last_tweet usr = tw['user']['screen_name'] lTw = oC[usr] oC[usr] = tw if lTw is None: return prev = parsedate(lTw['created_at']) latt = parsedate(tw['created_at']) delta = latt - prev self.__tweet_times[usr].append(delta.total_seconds())
def get_time_between_mentions(twts,usrlst): tweetTimes = {usr:[] for usr in usrlst} lastTwts = {usr: None for usr in usrlst} usrset = set(usrlst) for tw in twts: ments = set(usr['screen_name'] for usr in tw['entities']['user_mentions']) mentIusrs = ments.intersection(usrset) for cmaker in mentIusrs: lastTwt = lastTwts[cmaker] lastTwts[cmaker] = tw if lastTwt is None: continue oldDate = parsedate(lastTwt['created_at']) nwDate = parsedate(tw['created_at']) diff = nwDate - oldDate tweetTimes[cmaker].append(diff.total_seconds()) pass res = {usr:0 if len(tweetTimes[usr]) == 0 else sum(tweetTimes[usr])/len(tweetTimes[usr]) for usr in usrlst} return res
def __init__(self, assets, weights=None, date=None): """Initializes the given portfolio with a list of assets and optionally, a weights vector and a reference date.""" # if assets are given inside a string, split them first if isinstance(assets, str): assets = assets.split() # cast assets to a list, whenever the object is iterable try: assets = [a for a in assets] except TypeError: raise TypeError("assets must to be an iterable object") assets_nr = len(assets) if not assets_nr > 0: raise ValueError("needed at least one asset, given {0}". \ format(assets_nr)) # if weights aren't given, assume it's equally weighted portfolio if weights is None: weights = [1.0/assets_nr for _ in assets] # cast weights to a list, whenever the object is iterable try: weights = [w for w in weights] except TypeError: raise TypeError("weights must to be an iterable object") if assets_nr != len(weights): raise ValueError("expecting a weights vector of length {0}". \ format(assets_nr)) if sum(weights) > 1.0: raise ValueError("weights cannot exceed 1.0, but its sum is {0}". \ format(weights_sm)) # if date isn't given, assume it is today if date is None: date = dt.datetime.combine(dt.date.today(), dt.time()) # if date is a string, try to parse it if isinstance(date, str): date = parsedate(date) # check if the type of reference date is `datetime` if not isinstance(date, dt.datetime): raise TypeError("expecting a `datetime` instance for " + "the reference date") # initializes the internal data frame self._data = pd.DataFrame(np.array([weights]), columns=assets, index=[date]) # EOF
def _set_aws_auth(self): """ _set_aws_auth gets key, secret, token and expiration either from a file or from a temporary instance and sets them """ cred_tuple = get_aws_creds(self.run_local) self._aws_key = cred_tuple.access_key_id self._aws_secret = cred_tuple.secret_access_key self._aws_token = cred_tuple.token self._aws_token_expiry = parsedate(cred_tuple.expiration)
def _EG_MentionTime(self,tw=None): if self._state == "start": self.__last_mention = {us:None for us in self._usrset} self.__mention_times = {us:[] for us in self._usrset} return if self._state == "end": oC = self.__mention_times res = {us:float(sum(oC[us]))/len(oC[us]) if len(oC[us]) > 0 else 0 for us in self._usrset} return res mentioned = set([mn['screen_name'] for mn in tw['entities']['user_mentions']]) carMakerMentions = self._usrset.intersection(mentioned) if len(carMakerMentions) == 0: return latter = parsedate(tw['created_at']) for cmaker in carMakerMentions: first = self.__last_mention[cmaker] self.__last_mention[cmaker] = tw if first is None: continue first = parsedate(first['created_at']) diff = latter - first self.__mention_times[cmaker].append(diff.total_seconds())
def _MX_TimeReplies(self,tw=None): if self._state == "start": self.__last_yesreply = {us:None for us in self._usrset} return if self._state == "end": return if tw['user']['screen_name'] not in self._usrset: return oC = self.__last_yesreply usr = tw['user']['screen_name'] lTw = oC[usr] if (('quoted_status_id_str' in tw and tw['quoted_status_id_str'] is not None) or ('in_reply_to_screen_name' in tw and tw['in_reply_to_screen_name'] is not None) or ('in_reply_to_status_id' in tw and tw['in_reply_to_status_id'] is not None)): oC[usr] = tw if lTw is None: return "." if lTw['id'] == tw['id']: return "." prev = parsedate(lTw['created_at']) latt = parsedate(tw['created_at']) delta = latt - prev if delta.total_seconds() == 0 and tw['text'] == lTw['text']: return "." return delta.total_seconds()
def test_feedPubDateUsesNewestEpisode(self): self.fg.episodes[0].publication_date = \ datetime.datetime(2015, 1, 1, 15, 0, tzinfo=pytz.utc) self.fg.episodes[1].publication_date = \ datetime.datetime(2016, 1, 3, 12, 22, tzinfo=pytz.utc) self.fg.episodes[2].publication_date = \ datetime.datetime(2014, 3, 2, 13, 11, tzinfo=pytz.utc) rss = self.fg._create_rss() pubDate = rss.find("channel").find("pubDate") assert pubDate is not None parsedPubDate = parsedate(pubDate.text) assert parsedPubDate == self.fg.episodes[1].publication_date
def _PT_Weekday(self,tw=None): if self._state == "start": #self.__dayRank = {e:i for i,e in enumerate([6,5,0,3,4,2,1])} rankWeek = [5, 6, 4, 3, 2, 1, 0] self.__dayRank = {e:int(i*3/len(rankWeek)) for i,e in enumerate(rankWeek)} return if self._state == "end": return if tw['user']['screen_name'] not in self._usrset: return "." twDate = parsedate(tw['created_at']) if twDate.weekday() >= 5: return 0 # 5 and 6 are Saturday Sunday return 1
def _PT_PeakHour(self,tw=None): if self._state == 'start': self.__peakhr_tweets = {us:0 for us in self._usrset} self.___peakhrs = set([16,17,18,19]) return if self._state == 'end': tC = self.__tweet_count fC = self.__peakhr_tweets return {us:float(fC[us])/tC[us] if tC[us] > 0 else 0 for us in self._usrset} if tw['user']['screen_name'] not in self._usrset: return if parsedate(tw['created_at']).time().hour in self.___peakhrs: us = tw['user']['screen_name'] self.__peakhr_tweets[us] += 1
def _PT_Weekday(self,tw=None): if self._state == "start": self.__weekday_count = {us:0 for us in self._usrset} return if self._state == "end": oC = self.__weekday_count tC = self.__tweet_count res = {us:float(oC[us])/float(tC[us]) if tC[us] > 0 else 0 for us in self._usrset} return res if tw['user']['screen_name'] not in self._usrset: return twDate = parsedate(tw['created_at']) if twDate.weekday() >= 5: return # 5 and 6 are Saturday Sunday self.__weekday_count[tw['user']['screen_name']] += 1
def _parse_game(self, date_string, time_string, matchup_string, location_string): # Parse date and time into a datetime. date = parsedate(date_string).date() time = parsedate(time_string).time() game_time = datetime.datetime.combine(date, time) # Parse matchup. # Playoffs start with "(Pla)", so have to pull that off the front. if matchup_string.startswith('(Pla) '): matchup_string = matchup_string[6:] playoffs = True else: playoffs = False teams = matchup_string.split(' at ') if teams[0] == self._team_name: opponent = teams[1] is_home = False else: opponent = teams[0] is_home = True return Game(game_time, opponent, is_home, location_string, playoffs=playoffs)
def _PT_PeakHour(self,tw=None): if self._state == 'start': hrRank = [9, 10, 8, 7, 11, 6, 5, 12, 4, 3, 2, 13, 1, 0, 14, 23, 15, 22, 21, 20, 19, 16, 18, 17] self.___hrsRanks = {e:int(i*2/len(hrRank)) for i,e in enumerate(hrRank)} #self.___peakhrs = set([15,16,17,18,19,20]) self.___peakhrs = set(range(8,18)) return if self._state == 'end': return if tw['user']['screen_name'] not in self._usrset: return "." #twTime = parsedate(tw['created_at']).time().hour hour = parsedate(tw['created_at']).time().hour return 1 if hour in self.___peakhrs else 0