Example #1
0
 def _MX_RepTime(self):
     res = {}
     self.__rep_timediffs = {}
     for us in self._usrset:
         convoReps = self._carmakerConvoReps[us]
         timeDiffs = []
         self.__rep_timediffs[us] = timeDiffs
         for cvId, reps in convoReps.items():
             convo = self._convos[cvId]
             mentionTw = None
             for i, tw in enumerate(convo):
                 if 'created_at' not in tw: continue
                 if tw['user']['screen_name'] == us and i == 0: continue
                 if tw['user']['screen_name'] != us: 
                     mentionTw = tw
                     continue
                 if mentionTw is None: continue
                 st = parsedate(mentionTw['created_at'])
                 end = parsedate(tw['created_at'])
                 delta = end - st
                 timeDiffs.append(delta.total_seconds())
                 mentionTw = None
                 reps -= 1
                 if reps == 0: break # Go to the next conversation
         res[us] = 0
         if len(timeDiffs) > 0: res[us] = mean(timeDiffs)
     return res
Example #2
0
 def parse_one_date_start_end(self, freetext_date, original_date):
     if self.is_decade(freetext_date):
         # special case like 1950s
         startdate = self.parse_decade(freetext_date)
         enddate = self.parse_decade(freetext_date, end=True)
     else:
         # default: all other cases
         try:
             startdate = parsedate(freetext_date,
                                   self.PARSERINFO,
                                   default=datetime.date(
                                       datetime.MINYEAR, 1, 1),
                                   dayfirst=True)
             ##            startdate, tokens = parsedate(freetext_date, self.PARSERINFO, default=datetime.date(datetime.MINYEAR, 1, 1), dayfirst=True, fuzzy_with_tokens = True)
             enddate = parsedate(freetext_date,
                                 self.PARSERINFO,
                                 default=datetime.date(
                                     datetime.MINYEAR, 12, 31),
                                 dayfirst=True)
         except (ValueError, TypeError):
             raise self.UnparsableDateWarning(original_date)
     logging.debug("parsed: %s  :  %s", startdate, enddate)
     ##            if self.list_has_alphanumeric(tokens):
     ##                logging.info("Could not parse tokens %s in original datestring %s", tokens, original_date)
     return startdate, enddate
Example #3
0
    def _parse_game(self, date_string, time_string, home_team, away_team,
                    location_string, game_id_string, playoffs, last_game):
        # Parse date and time into a datetime.
        date = parsedate(date_string).date()
        time = parsedate(time_string).time()
        game_time = datetime.datetime.combine(date, time)

        # Infer the year by assuming this game must be after the last game
        # and the first game should be within the next 3 months.
        year = last_game.time.year if last_game else datetime.date.today().year
        game_time = game_time.replace(year=year)
        if last_game:
            if game_time < last_game.time:
                game_time = game_time.replace(year=year + 1)
        else:
            if game_time > datetime.datetime.now() + datetime.timedelta(days=90):
                game_time = game_time.replace(year=year - 1)

        # Parse matchup.
        if home_team == self._team_name:
            opponent = away_team
            is_home = True
        else:
            opponent = home_team
            is_home = False

        return Game(game_time,
                    opponent,
                    is_home,
                    location_string,
                    game_id_string,
                    playoffs)
def update_events():
    # We're only concerned with events for the last thirty days
    # First, delete all events older then thirty days from Event table
    thirty_days_ago = date.today() - timedelta(days=30)
    logger.debug('Deleting old events:')
    Event.objects.filter(start_time__lt=thirty_days_ago).delete()
    
    # Next, retrieve events from facebook until we run into the last event we know about
    latest_event = None
    try:
        latest_event = Event.objects.latest()
    except ObjectDoesNotExist:
        pass

    for edict in request_events():
        logger.debug('Got event info for %s (%s)', edict['name'], edict['start_time'])
        if parsedate(edict['start_time']).date() < thirty_days_ago:
            break
        if latest_event and edict['id'] == latest_event.fbid:
            break

        e, created = Event.objects.get_or_create(
            fbid=edict['id'],
            start_time = parsedate(edict['start_time']),
            end_time = parsedate(edict['end_time']),
            name = edict['name'],
            location = edict['location']
            )
        e.description = edict['description']

        vendor_src = (v.name for v in Vendor.objects.only('name').all())
        vendor_names = list(parse_event_vendors(edict, vendor_src))
        e.vendors = Vendor.objects.filter(name__in = vendor_names)
        e.save()
Example #5
0
File: cfg.py Project: xorbit/PiSurv
def is_recording_time():
    """Check whether it is a time span where we record"""
    # If no time span is specified, record continuously
    if not REC_TIME_SPAN:
        return True
    # Loop through all time spans until we find one that's active
    for ts in REC_TIME_SPAN:
        # Get the current time
        now = datetime.now()
        # We don't store the parsed objects but keep re-parsing them
        # because the parser's behavior depends on the current time
        # and date, allowing such things as day of the week to
        # be specified in the time specs.
        start = parsedate(ts['start'])
        stop = parsedate(ts['stop'])
        # Special case if start time is greater than stop time
        if start > stop:
            # Then we check if the current time is greater than the
            # start time, OR less than the end time.  Because of the
            # behavior of the dateutil parser, this makes it possible
            # to specify time spans that cross midnight.
            if now >= start or now < stop:
                return True
        else:
            # Normally, we're in the time span if now is between start
            # and stop time.
            if now >= start and now < stop:
                return True
    # We're not in any time span, don't record
    return False
Example #6
0
def file_desc(file):
    locations = []
    for raw_location in db.session.query(FilesLocations).filter_by(
            file_sha256=file.sha256):
        for range in raw_location[4]:
            location = {
                "archive": raw_location[1],
                "suite": raw_location[2],
                "component": raw_location[3],
                "begin": parsedate(range[0]).strftime("%Y%m%dT%H%M%SZ"),
                "end": parsedate(range[-1]).strftime("%Y%m%dT%H%M%SZ"),
            }
            locations.append(location)
    desc = {
        "name": file.name,
        "path": file.path,
        "size": file.size,
        "locations": locations,

        # TEMP: for retro-compatibility, we keep those fields taken from
        # the first location
        # "archive_name": locations[0]["archive"] if locations else None,
        # "first_seen": locations[0]["timestamps"][0] if locations else None
    }
    return desc
Example #7
0
def parse_file(file_url, header_loc):
    # match 2/4 digits, separator, 2/4 digits, optional separator, optional 2/4 digits
    date_regex = r'((\d{4}|\d{2}|\d{1})(\.|\-|\/)(\d{4}|\d{2}|\d{1})?(\.|\-|\/)?(\d{4}|\d{2}))'

    if 'Demographics' in file_url:
        r = get('https://dshs.texas.gov/coronavirus/additionaldata/')
        soup = BeautifulSoup(r.text, 'lxml')
        parent = soup.find("a",
                           {"title": "Case and Fatality Demographics Data "})
        date_text = parent.nextSibling.nextSibling.text
        max_date = parsedate(re.search(date_regex, date_text).group(0))

    elif 'district-level' in file_url:
        # url updates weekly, if pandas can read and rows are approx expected, then file is updated
        try:
            df = pd.ExcelFile(file_url, engine='xlrd').parse(sheet_name=0,
                                                             header=header_loc)
            if len(df.index) > 1000:
                max_date = today.date()
        except:
            pass
    else:
        df = pd.ExcelFile(file_url, engine='xlrd').parse(sheet_name=0,
                                                         header=header_loc)
        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
        date_text = list(df.columns)[-1]
        max_date = parsedate(re.search(date_regex, date_text).group(0)).date()
    return 1 if max_date == today.date() else 0
Example #8
0
		def get_settings(self, node):
			if node.find(ns + "title") is not None:
				self.title = node.find(ns + "title").text
			else:
				raise AttributeError("No title!")

			if node.find(ns + "id") is not None:
				self.id = node.find(ns + "id").text
			else:
				raise AttributeError("No id!")

			if node.find(ns + "opening-date") is not None:
				self.opening = parsedate(node.find(ns + "opening-date").text)
			else:
				self.opening = None

			if node.find(ns + "closing-date") is not None:
				self.closing = parsedate(node.find(ns + "closing-date").text)
			else:
				self.closing = None

			if node.find(ns + "template-file") is not None:
				self.template = node.find(ns + "template-file").text
			else:
				raise AttributeError("No template file!")
			
			if node.find(ns + "user-file") is not None:
				fn = "%s.users.xml" % self.id
				if os.path.exists(fn):
					self.users = etree.parse(fn)
				else:
					self.users = get_users_xml_from_csv(node.find(ns + "user-file").text)
			else:
				raise AttributeError("No users file!")
Example #9
0
    def parse_start_end(self, start, end, original_date):
        # parse the dates for start and end
        try:
            startdate = parsedate(start,
                                  self.PARSERINFO,
                                  default=datetime.date(
                                      datetime.MINYEAR, 1, 1),
                                  dayfirst=True)
##            startdate, tokens = parsedate(start, self.PARSERINFO, default=datetime.date(datetime.MINYEAR, 1, 1), dayfirst=True, fuzzy_with_tokens = True)
        except (ValueError, TypeError):
            raise self.UnparsableDateWarning(original_date)
        else:
            ##            if self.list_has_alphanumeric(tokens):
            ##                logging.info("Could not parse tokens %s in separated datestring %s, original datestring %s", tokens, start, original_date)
            try:
                enddate = parsedate(end,
                                    self.PARSERINFO,
                                    default=datetime.date(
                                        datetime.MINYEAR, 12, 31),
                                    dayfirst=True)
##               enddate, tokens = parsedate(end, self.PARSERINFO, default=datetime.date(datetime.MINYEAR, 12, 31), dayfirst=True, fuzzy_with_tokens = True)
            except (ValueError, TypeError):
                raise self.UnparsableDateWarning(original_date)
            else:
                ##                if self.list_has_alphanumeric(tokens):
                ##                    logging.info("Could not parse tokens %s in separated datestring %s, original datestring %s", tokens, end, original_date)
                # check whether the date(s) are exact, like "07/11/1945" or approximate, like "November 1945"
                tmp = parsedate(end,
                                self.PARSERINFO,
                                default=datetime.date(datetime.MINYEAR, 1, 1),
                                dayfirst=True)
                if tmp != enddate:
                    self.IS_APPROXIMATE = True
        return startdate, enddate
Example #10
0
def check_file(file_name):
    """
    Check the given file for parse errors.
    :param file_name: file name to check
    :return: str error message.
    """
    if file_name.endswith('.md'):
        parser = parse_md_file
        required_fields = REQUIRED_FIELDS
    elif file_name.endswith('.yml'):
        parser = parse_yml_file
        required_fields = REQUIRED_YAML_FIELDS
    else:
        return 'Unknown file type: %s' % file_name

    try:
        data = parser(file_name)
    except Exception as e:
        return str(e)

    if HOF_FILENAME_RE.search(file_name):
        return check_hof_data(data)

    if 'mfsa_id' not in data:
        return 'The MFSA ID must be in the filename or metadata.'

    for field in required_fields:
        if field not in data:
            return 'The {0} field is required in the file metadata.'.format(
                field)

    for f in data['fixed_in']:
        if "ESR" in f and "ESR " not in f:
            return "When ESR is specified, it must be of the form 'Firefox ESR XX', not 'Firefox ESRXX' (Found '" + f + "')"

    if 'announced' in data:
        try:
            parsedate(data['announced']).date()
        except Exception:
            return 'Failed to parse "{}" as a date'.format(data['announced'])

    if file_name.endswith('.yml'):
        for cve, advisory in data['advisories'].items():
            if not CVE_RE.search(cve):
                return 'The cve field {0} does not appear to be valid.'.format(
                    cve)
            for field in REQUIRED_YAML_ADVISORY_FIELDS:
                if field not in advisory:
                    return 'The {0} field is required in the ' \
                           'file metadata for {1}.'.format(field, cve)
            if 'bugs' in advisory:
                for bug in advisory['bugs']:
                    if 'url' not in bug:
                        return 'There is a bug entry in {} without a "url" field.'.format(
                            cve)

    return None
Example #11
0
    def post(self):
        record = request.get_json(force=True)
        record['direction'] = int(record['direction'])
        r = redis.Redis(connection_pool=self.redis_pool)
        # TODO: verify timezone handling
        # TODO: use pub/sub to wrap the redis transaction and the postgres transaction?

        # begin redis transaction
        current_timestamp = parsedate(record['timestamp']).timestamp()
        previous_datetime = r.get('{}:timestamp'.format(record['dpu_id']))

        if previous_datetime:
            previous_timestamp = parsedate(previous_datetime).timestamp()
        else:
            previous_timestamp = current_timestamp

        transaction = r.pipeline()

        if previous_timestamp > current_timestamp:
            self._queue_reconciliation(record['dpu_id'], current_timestamp)

        spaces = self.dpu_spaces(record['dpu_id'])

        transaction.incr(record['dpu_id'])
        transaction.set('{}:timestamp'.format(record['dpu_id']),
                        record['timestamp'])
        record['count'], _ = transaction.execute()

        transaction = r.pipeline()
        for space in spaces:
            transaction.incr('{}:count'.format(space['space_id']),
                             space['direction'] * record['direction'])
        transaction.execute()

        # begin postgres transaction
        with self.pg.get_db_cursor(commit=True) as cursor:
            cursor.execute(DPU_LOG_SQL, record)
            cursor.execute(DPU_TELEM_SQL, record)

            # TODO: add handling for unmatched spaces
            for space in spaces:
                space_record = {
                    'recorded':
                    record['timestamp'],
                    'space_id':
                    space['space_id'],
                    'dpu_id':
                    record['dpu_id'],
                    'count':
                    record['count'] + space['direction'] * record['direction']
                }
                cursor.execute(SPACE_TELEM_SQL, space_record)
                print(cursor.query)
        return {'status': 'OK'}
Example #12
0
def initialize_database():
    """
    Initialisiere die Datenbank mit Testdaten. Alle vorhandenen Daten werden geluescht.
    """
    stations = (("Oper", "Operngasse 1", "1010", "Wien", u"Österreich"),
                 ("Hohe Warte Stadium", "Heiligenstadt", "1190", "Wien", u"Österreich"),
                 ("Fliegerhorst Brumowski", "Brumowskigasse 23", "3425", "Tulln an der Donau", u"Österreich"),
                 ("FH Technikum Wien", "Höchstädtplatz 6", "1200", "Wien", u"Österreich"),
                 ("Red Bull Ring", u"Schloßweg 1", "8724", "Spielberg", u"Österreich"))
    cars = (("Citroen", u"C3", "silber", 5, 4.8, 50, "W-997G"),
             ("Ford", u"Focus", "rot", 5, 5.9, 70, "W-997GH"),
             ("Smart", u"ForTwo", "gelb", 2, 3.5, 70, "W-997GI"),
             ("VW", u"Käfer", "Rost", 4, 6.8, 40, "W 992223" ),
             ("Renault", "Grand Espace", "schwarz", 7, 8.8, 120, "K 009DF"),
             ("McLaren", "P1", "gelb", 2, 12.3, 190, "S 99823"))
    kunden = (("Alice Amber", "Nussdorfer Strasse 77", "1090", "Wien", u"Österreich"),
              ("Bob Builder", "Lederwaschstrasse 2", "5589", "Tamsweg", u"Österreich"),
              ("Istvan Nagy", "Halasz utca 25", "9400", "Sopron", u"Ungarn"),
              ("Ignaz Zurbgriggen", "Wildbachstrasse 9", "8340", "Hinwil", u"Schweiz"),
              ("Charly Custer", "Albrechtgasse 530", "3571", "Gars am Kamp", u"Österreich"),
              ("Eve Easter", "Kardinal Piffl Platz 2", "3400", "Klosterneuburg", u"Österreich"))

    db.drop_all()
    db.create_all()

    for c in cars:
        d = dict(zip( ('manufacturer', 'typ', 'color', 'seats', 'consumption', 'price', 'platenumber'), c))
        db.session.add(Car(**d))

    for s in stations:
        d = dict(zip( ('name', 'street', 'plz', 'city', 'country'), s))
        db.session.add(Station(**d))

    for k in kunden:
        d = dict(zip( ('name', 'street', 'plz', 'city', 'country'), k))
        db.session.add(Kunde(**d))

    db.session.flush()
    c3, focus, fortwo, kaefer, espace, p1 = Car.query.all()
    oper, hohewarte, lale, fh, rbr = Station.query.all()
    alice, bob, istvan, ignaz, charly, eve = Kunde.query.all()

    c3.station = hohewarte
    focus.station = lale
    espace.station = hohewarte

    Leihe(kunde=alice, car=c3, von=parsedate('2014-02-01 14:00'), bis=parsedate('2014-02-10 10:00'), returned=True, station_abhol=hohewarte, station_return=hohewarte)
    Leihe(kunde=alice, car=c3, von=parsedate('2014-02-14 08:00'), bis=parsedate('2014-02-15 22:00'), returned=True, station_abhol=hohewarte, station_return=lale)
    Leihe(kunde=ignaz, car=c3, von=datetime.today() - timedelta(days=2), bis=datetime.today() + timedelta(days=5), returned=False, station_abhol=hohewarte, station_return=rbr)
    Leihe(kunde=istvan, car=p1, von=parsedate('2014-01-14 09:00'), bis=datetime.today() + timedelta(days=2), returned=False, station_abhol=lale, station_return=fh)
    Leihe(kunde=charly, car=p1, von=datetime.today() + timedelta(days=10), bis=datetime.today() + timedelta(days=12), returned=False, station_abhol=lale, station_return=fh)

    db.session.commit()
Example #13
0
def CutSeconds(twts,seconds):
    start = parsedate(twts[0]['created_at'])
    startI = 0
    endI = len(twts) - 1
    for i, tw in enumerate(twts):
        if i == 0: continue
        end = parsedate(tw['created_at'])
        delta = end - start
        if delta.total_seconds() >= seconds:
            endI = i
            break
    return (startI, endI)
Example #14
0
 def _MX_RepTime(self,tw=None):
     if self._state == 'start':
         return
     if self._state == 'end':
         return
     if 'in_reply_to_status_id' not in tw or tw['in_reply_to_status_id'] is None: return "."
     end = parsedate(tw['created_at'])
     startw = self.__twts_dic.get(tw['in_reply_to_status_id'])
     if startw is None: return "."
     start = parsedate(startw['created_at'])
     dif = end - start
     return dif.total_seconds()
Example #15
0
 def download_eia_data(self,operation):
     """
         First check to see if file on server is newer than what we have on file system.
         If so, then download and process where necessary
     """
     #set initial timestamps in case of redirects
     file_time = parsedate('2000-01-01 00:00+00:00')
     url_time  = parsedate('2000-01-01 00:00+00:00')
     file_unix_time = file_time.strftime('%s')
     url_unix_time  = url_time.strftime('%s')
     
     
     if operation == 'reserves':
         url = self.EIA_DL_URL
         out = self.EIA_OUTFILE
     if operation == 'pricing':
         url = self.EIA_OIL_PRICE_URL
         out = self.EIA_OIL_PRICE_OUTFILE
         
     r = requests.head(url)#, allow_redirects=True)
     print(f'RET: {r}')
     if r.status_code == 301: #sometimes a permanent redirect is given. take header Location as url for Get
         url = r.headers['Location']
     else:
         url_time = r.headers['last-modified']
         url_date = parsedate(url_time)
         url_unix_time = url_date.strftime('%s')
         print(url_date.strftime('%s'))
         
     #check to see if file exists. If not, download
     if(os.path.isfile(out)):
         file_time = datetime.fromtimestamp(os.path.getmtime(out))
         file_unix_time = file_time.strftime('%s')
         print(f"Here: {file_time}")
     else:
         file_time = parsedate('2000-01-01 00:00+00:00')
         file_unix_time = file_time.strftime('%s')
         
     if (url_unix_time >= file_unix_time) or os.path.isfile(out) == False:
         response = requests.get(url)
         totalbits = 0
         if response.status_code == 200:
             with open(out, 'wb') as f:
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         totalbits += 1024
                         f.write(chunk)
         print(f"Downloaded {operation} file = ",totalbits*1025,"KB...")
         return(1)
     else:
         print("Local file is latest.")
         return(1)
Example #16
0
    def test_feedPubDateNotOverriddenByEpisode(self):
        self.fg.episodes[0].publication_date = \
            datetime.datetime(2015, 1, 1, 15, 0, tzinfo=pytz.utc)
        pubDate = self.fg._create_rss().find("channel").find("pubDate")
        # Now it uses the episode's published date
        assert pubDate is not None
        assert parsedate(pubDate.text) == self.fg.episodes[0].publication_date

        new_date = datetime.datetime(2016, 1, 2, 3, 4, tzinfo=pytz.utc)
        self.fg.publication_date = new_date
        pubDate = self.fg._create_rss().find("channel").find("pubDate")
        # Now it uses the custom-set date
        assert pubDate is not None
        assert parsedate(pubDate.text) == new_date
Example #17
0
    def test_feedPubDateNotOverriddenByEpisode(self):
        self.fg.episodes[0].publication_date = \
            datetime.datetime(2015, 1, 1, 15, 0, tzinfo=pytz.utc)
        pubDate = self.fg._create_rss().find("channel").find("pubDate")
        # Now it uses the episode's published date
        assert pubDate is not None
        assert parsedate(pubDate.text) == self.fg.episodes[0].publication_date

        new_date = datetime.datetime(2016, 1, 2, 3, 4, tzinfo=pytz.utc)
        self.fg.publication_date = new_date
        pubDate = self.fg._create_rss().find("channel").find("pubDate")
        # Now it uses the custom-set date
        assert pubDate is not None
        assert parsedate(pubDate.text) == new_date
Example #18
0
 def on_order(self, rr):
     tlogger.debug(rr)
     #rr = r['result']
     id = rr['clientOrderId']
     status = rr['status']
     rtype = rr['reportType']
     o = None
     try: o = self.orders[id]
     except KeyError: pass
     symbol = self.convert_symbol(rr['symbol'],0)
     side = rr['side']
     amount = float(rr['quantity'])
     filled = float(rr['cumQuantity'])
     remaining = amount-filled if status not in ('canceled','filled','suspended','expired') else 0
     if o is None:
         #(status == "new")
         #"2017-10-20T12:29:43.166Z"
         dto = parsedate(rr['createdAt']).replace(tzinfo=None)
         ts = timestamp_ms(dto)
         price = float(rr['price'])
         self.add_order(id=id, symbol=symbol, side=side, price=price, amount=amount, timestamp=ts,
                        remaining=remaining, filled=filled, params={'info': rr})
     else:
         # Can the server send "canceled"/replaced message twice, in response
         # to both cancelOrder/cancelReplaceOrder and subscribeReports?
         # Worse yet, the "expired" may arrive sooner than update,
         # thus have to check that remaining is smaller than previous, and filled larger
         remaining = min(remaining, o['remaining'])
         filled = max(filled, o['filled'])
         if status in ('canceled','filled','suspended','expired'):
             self.update_order(id, 0, filled, params={'info': rr})
             #self.log_error('received unregistered order {}'.format(id))
         elif status in ('partiallyFilled','new'):
             self.update_order(id, remaining, filled, params={'info': rr})
         else:
             #order 
             self.log_error('received unknown order status. r: {}'.format(rr))
     
     if rtype == 'trade':
         tid = rr['tradeId']
         tprice = float(rr['tradePrice'])
         tamount = float(rr['tradeQuantity'])
         tdto = parsedate(rr['updatedAt']).replace(tzinfo=None)
         tts = timestamp_ms(tdto)
         #fee is negative when we are paid the rebate
         #NB! fee is always in payout currency
         fee = float(rr['tradeFee'])
         self.add_fill(id=tid, symbol=symbol, side=side, price=tprice, amount=tamount,
                       timestamp=tts, order=id, fee=fee, params={'info': rr.copy()})
Example #19
0
def _mongo_kv(schema_field, value):
    """
    Returns a tuple of the MongoDB field name and validated value

    The the schema_field is multi_valued, value will always be returned as a list.

    :param schema_field dict: The schema for the field being evaluated, taken from
      DocStore schema.fields list
    :param value: Value to insert for this field. Value must be of the same type
      as specified in the schema_field, or the value must be able to be cast
      into that type.

    :return kv: Tuple of (mongo_field_name, mongo_value)
    :rtype tuple:
    """

    if value is None:
        return schema_field['mongo_field'], None

    field_type = schema_field['type']

    # Ensure multi_valued value is a list
    multi_valued = schema_field['multi_valued']
    if multi_valued and not isinstance(value, list):
        value = [value]

    if field_type == 'int':
        value = [int(v) for v in value] if multi_valued else int(value)
    elif field_type == 'boolean':
        value = [bool(v) for v in value] if multi_valued else bool(value)
    # Datetime should be inserted into MongoDB as python datetime objects
    elif field_type == 'datetime':
        if type(value) is not datetime.datetime and not multi_valued:
            value = [parsedate(v)
                     for v in value] if multi_valued else parsedate(value)
    # Location values should be inserted into MongoDB as two-element lists with x,y coordinates
    elif field_type == 'point':
        value = list(value)[:2]
    elif field_type == 'float':
        value = [float(v) for v in value] if multi_valued else float(value)

    # If multi_valued_type is "set", ensure no duplicates
    if multi_valued and \
            schema_field['multi_valued_operation'] == 'replace' and \
            schema_field['multi_valued_type'] == 'set':
        value = list(set(value))

    return schema_field['mongo_field'], value
Example #20
0
 def on_new_order(self, item):
     #["n", 148, 6083059, 1, "0.03000000", "2.00000000", "2018-09-08 04:54:09", "2.00000000", None]
     #["n", <currency pair id>, <order number>, <order type>, "<price>", "<amount>", "<date>", "<original amount ordered>" "<clientOrderId>"]
     _, pair_id, oid, otype, price, remaining, tstr, amount, clientOrderId = item[:
                                                                                  9]
     #Convert to string because api.create_order returns id as string
     oid = str(oid)
     symbol = self.id_to_symbol(pair_id)
     side = 'buy' if otype == 1 else 'sell'
     price = float(price)
     amount = float(amount)
     dto = parsedate(tstr)
     if dto.tzinfo is not None:
         logger2.error(
             'POLONIEX HAS CHANGED THEIR DATE FORMAT: {}, {}'.format(
                 tstr, dto))
     ts = timestamp_ms(dto.replace(tzinfo=None))
     remaining = float(remaining)
     #print('on_order:',oid,symbol,side,price,amount,ts,remaining,filled,payout)
     try:
         self.orders[oid]
     except KeyError:
         #set filled to 0 because filled (and payout) is updated by trades
         self.add_order(id=oid,
                        symbol=symbol,
                        side=side,
                        price=price,
                        amount=amount,
                        timestamp=ts,
                        remaining=remaining,
                        filled=0)
     else:
         self.update_order(id=oid, remaining=remaining)
Example #21
0
 def on_fill(self, item):
     #["t", 12345, "0.03000000", "0.50000000", "0.00250000", 0, 6083059, "0.00000375", "2018-09-08 05:54:09"]
     #['t', 9394539, '0.00057427', '0.00000476', '0.00000000', 0, 274547887461]
     #["t", <trade ID>, "<rate>", "<amount>", "<fee multiplier>", <funding type>, <order number>, <total fee>, <date>, "<clientOrderId>", "<trade total>"]
     #funding_type: 0 (exchange wallet), 1 (borrowed funds), 2 (margin funds), or 3 (lending funds).
     _, tid, price, amount, fee_rate, funding_type, oid = item[:7]
     tid, oid = str(tid), str(oid)
     price = float(price)
     amount = float(amount)
     fee_rate = float(fee_rate)
     total_fee = float(item[7]) if len(item) > 7 else None
     dto = parsedate(item[8]) if len(item) > 8 else dt.utcnow()
     if dto.tzinfo is not None:
         logger2.error(
             'POLONIEX HAS CHANGED THEIR DATE FORMAT: {}, {}'.format(
                 item[8], dto))
     ts = timestamp_ms(dto.replace(tzinfo=None))
     self.add_fill(id=tid,
                   symbol=None,
                   side=None,
                   price=price,
                   amount=amount,
                   fee_rate=fee_rate,
                   timestamp=ts,
                   order=oid)
Example #22
0
 def _parse(self, raw_tiddler):
     '''
     :type raw_tiddler: str
     :return: None
     '''
     DIVIDER = '\n\n'
     idx_header_end = raw_tiddler.index(DIVIDER)
     header_lines = raw_tiddler[:idx_header_end]
     to_parse = {
         'created': False,
         'modified': False,
         'tags': False,
         'title': False,
         'type': False,
     }
     for line_ in header_lines:
         line = line_.strip()
         for field_name in to_parse:
             entry_marker = '%s:' % field_name
             if not line.startswith(entry_marker):
                 continue
             data = line.split(None, 1)[-1]
             if entry_marker in ('created', 'modified'):
                 setattr(self, entry_marker, parsedate(data))
             elif entry_marker == 'tags':
                 self.tags = [tag.strip() for tag in data.split(',')]
             elif entry_marker == 'title':
                 self.title = data
             elif entry_marker == 'type':
                 self.type = data
     self.content = raw_tiddler[idx_header_end + 2:].decode('utf-8')
Example #23
0
def download_to_file(url, file):
    frame_records = stack()[1]
    caller = getmodulename(frame_records[1]).upper()

    user_agent = {
        "User-agent":
        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"
    }
    try:
        r = requests.head(url, headers=user_agent)
        url_datetime = parsedate(r.headers['Last-Modified']).astimezone()
        file_datetime = datetime.datetime.fromtimestamp(
            path.getmtime(get_abspath_source_file(file))).astimezone()

        if (url_datetime > file_datetime):
            logging.info(
                '{} File on server is newer, so downloading update to {}'.
                format(caller, get_abspath_source_file(file)))
            actual_download_to_file(url, file, user_agent)
        else:
            logging.info(
                '{} File on server is older, nothing to do'.format(caller))
    except KeyError as exc:
        logging.warning(
            '{} KeyError in the headers. the {} header was not sent by server {}. Downloading file'
            .format(caller, str(exc), url))
        actual_download_to_file(url, file, user_agent)
    except FileNotFoundError as exc:
        logging.info("{} File didn't exist, so downloading {} from {}".format(
            caller, file, url))
        actual_download_to_file(url, file, user_agent)
    except Exception as exc:
        logging.warning('{} General exception occured: {}.'.format(
            caller, str(exc)))
        actual_download_to_file(url, file, user_agent)
Example #24
0
def commit_datetimes_since(repository: Path,
                           since: date,
                           until_inclusive: date = None) -> List[datetime]:
    """
    Return all commit datetimes authored since given date.

    Parameters
    ----------
    repository : Path
    since : date
    until_inclusive : List[datetime]

    Returns
    -------
    author_date : List[datetime]:
    """
    if until_inclusive is None:
        until_inclusive = date.today()
    return [
        parsedate(author_date) for author_date in subprocess.check_output(
            [
                'git', 'log', '--pretty=%aI', '--author=Gilles',
                f'--since={since.isoformat()}',
                f'--until={until_inclusive.isoformat()}'
            ],
            cwd=str(repository),
            universal_newlines=True).splitlines()
    ]
Example #25
0
def normalize(field_name, field_value, field_metadata):
    field_value = field_value.strip()
    field_type = field_metadata['type']
    try:
        if field_type == 'Date':
            return parsedate(field_value).strftime('%Y-%m-%d')
        elif field_type == 'Boolean':
            field_value = field_value.lower()
            if field_value in ('t', 'true', 'y', 'yes', '1'):
                return '1'
            elif field_value in ('f', 'false', 'n', 'no', '0'):
                return '0'
            else:
                return None
        elif field_type == 'Number':
            if field_metadata['numberType'] in ('integer', 'long'):
                return int(field_value)
            else:
                return float(field_value)
        elif field_type == 'StaticList':
            possible_values = field_metadata['values']
            if not possible_values:
                return field_value
            field_value = field_value.lower()
            for key, value in possible_values.items():
                if field_value == key.lower() or field_value == value.lower():
                    return key
            return None
        else:
            validationRegex = field_metadata['validationRegExp']
            if validationRegex and not re.fullmatch(validationRegex, field_value):
                return None
            return field_value
    except ValueError:
        return None
Example #26
0
    def get_build_depends_timestamps(self):
        """
            Returns a dict with keys Debian archives and
            values lists of tuple(timestamp, pkgs)
            where pkgs is a list of packages living there
        """
        required_timestamps = {}
        for pkg in self.buildinfo.get_build_depends():
            if not pkg.timestamp:
                self.get_bin_date(pkg)
            timestamp = parsedate(pkg.timestamp).strftime("%Y%m%dT%H%M%SZ")
            location = f"{pkg.archive_name}+{pkg.suite_name}+{pkg.component_name}"
            required_timestamps.setdefault(location,
                                           {}).setdefault(timestamp,
                                                          []).append(pkg)

            # We store timestamp value itself for the base mirror used for creating chroot
            self.buildinfo.required_timestamps.setdefault(location,
                                                          []).append(timestamp)

        location_required_timestamps = {}
        for location, timestamps in required_timestamps.items():
            # sort by the number of packages found there, convert to list of tuples
            timestamps = sorted(timestamps.items(),
                                key=lambda x: len(x[1]),
                                reverse=True)
            location_required_timestamps[location] = timestamps
        return location_required_timestamps
Example #27
0
    def _loadAndSendData(self, sock, filePath, metricName):
        """
    Returns the list of labels from the csv at filePath. Date and value
    fields are sent to the metric specified. As a side effect this
    creates the metric.

    :param sock: A connected socket object
    :param filePath: The csv with data to handle
    :param metricName: The target custom metric we will send data to
    """
        labels = []
        for (dttm, value, label) in self._loadDataGen(filePath):
            # Parse date string
            dttm = parsedate(dttm)
            # Convert to seconds since epoch (Graphite wants this)
            dttm = epochFromNaiveUTCDatetime(dttm)
            dttm = int(dttm)

            # Add data
            sock.sendall("%s %r %s\n" % (metricName, float(value), dttm))

            # Save the label for use later
            # Convert strings to appropriate numerical type
            try:
                labels.append(int(label))
            except ValueError:
                labels.append(float(label))

        self.gracefullyCloseSocket(sock)

        return labels
Example #28
0
 def sync_files(remote_url: str, local_path: pathlib.Path):
     ''' Sync remote url with local path, copy over modified time
     '''
     logging.info(f'Downloading: {remote_url}')
     try:
         session = _create_https_session()
         req = session.get(remote_url)
     except requests.exceptions.ContentDecodingError:
         logging.error(f"failed to decode {remote_url}")
         raise RuntimeError
     except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
         return RuntimeError
     if req.status_code == 200:
         local_path.parent.mkdir(parents=True, exist_ok=True)
         if local_path.suffix.endswith('gz'):
             with gzip.open(local_path, 'wb') as f:
                 f.write(req.content)
         else:
             with open(local_path, 'wb') as f:
                 f.write(req.content)
         modified_time = parsedate(req.headers['Last-Modified']).timestamp()
         access_time = datetime.utcnow().timestamp()
         os.utime(local_path, (access_time, modified_time))
     else:
         raise RuntimeError(f'{req.status_code} returned')
Example #29
0
def from_date_string(ldate):
    # type: (str) -> ldate
    parsed = parsedate(ldate)
    # fix for python < 3.6
    if not parsed.tzinfo:
        parsed = parsed.replace(tzinfo=pytz.utc)
    return parsed.astimezone(pytz.utc).date()
Example #30
0
 def default(self, path, **kwargs):
     listing = self.listing()
     if type(listing) != list:  # cache is corrupt
         self.cache.delete("listing")
         listing = self.listing()
     listing.reverse()
     url_listing = [p["path"] for p in listing]
     if not "/" + path in url_listing:
         raise cherrypy.HTTPError(404, "File not found")
     post_index = url_listing.index("/" + path)
     prev_post = next_post = None
     if post_index > 0:
         prev_post = listing[post_index - 1]
     if post_index < len(listing) - 1:
         next_post = listing[post_index + 1]
     src = self.dropbox.read_file(path + ".md")
     mdown = get_markdown()
     html = mdown.convert(src)
     tpl_post = self.tpl.get_template("post.html")
     return tpl_post.render(body=html,
             reading_time=reading_time(src),
             page_title=mdown.Meta["title"][0],
             date=parsedate(mdown.Meta["date"][0]),
             prev_post=prev_post,
             next_post=next_post,
             todays_year=time.strftime("%Y"))
Example #31
0
 def default(self, path, **kwargs):
     listing = self.listing()
     if type(listing) != list:  # cache is corrupt
         self.cache.delete("listing")
         listing = self.listing()
     listing.reverse()
     url_listing = [p["path"] for p in listing]
     if not "/" + path in url_listing:
         raise cherrypy.HTTPError(404, "File not found")
     post_index = url_listing.index("/" + path)
     prev_post = next_post = None
     if post_index > 0:
         prev_post = listing[post_index - 1]
     if post_index < len(listing) - 1:
         next_post = listing[post_index + 1]
     src = self.dropbox.read_file(path + ".md")
     mdown = get_markdown()
     html = mdown.convert(src)
     tpl_post = self.tpl.get_template("post.html")
     return tpl_post.render(body=html,
                            reading_time=reading_time(src),
                            page_title=mdown.Meta["title"][0],
                            date=parsedate(mdown.Meta["date"][0]),
                            prev_post=prev_post,
                            next_post=next_post)
Example #32
0
def get_summaries(section, taxons, no_cache):
    """ defines and eventually gets the assembly summary files"""
    summaries = set()
    for url, dest_file in make_urls(section, taxons):
        r = requests.head(url)
        url_time = r.headers['last-modified']
        url_date = parsedate(url_time)
        download_file = True
        if os.path.exists(dest_file) and not no_cache:
            if url_date <= datetime.datetime.fromtimestamp(
                    os.path.getmtime(dest_file), tz=datetime.timezone.utc):
                download_file = False

        if download_file:
            logging.getLogger().info(
                f"Downloading {section} summary file from: {url}")
            if not os.path.exists(os.path.dirname(dest_file)):
                os.mkdir(os.path.dirname(dest_file))
            with open(dest_file, "w") as dfs:
                dfs.write(requests.get(url).text)
            os.utime(dest_file,
                     (int(url_date.timestamp()), int(url_date.timestamp())))
        else:
            logging.getLogger().info(f"Using cached file: {dest_file}")
        summaries.add(dest_file)

    return summaries
Example #33
0
    def group_events_by_day(events):
        d = defaultdict(list)
        for event in events:
            date = parsedate(event["date"])
            d[date.isoformat()].append(event)

        return d
Example #34
0
    def update_event(event):
        start = parsedate(event["date"] + " " + event["start"])
        end = parsedate(event["date"] + " " + event["end"])

        timezone = dateutil.tz.gettz("Australia/Melbourne")

        start = start.replace(tzinfo=timezone)
        end = end.replace(tzinfo=timezone)

        return {
            **event,
            **{
                "start_iso": start.isoformat(),
                "end_iso": end.isoformat()
            }
        }
Example #35
0
def from_utc_string(dt):
    # type: (str) -> datetime
    parsed = parsedate(dt)
    # fix for python < 3.6
    if not parsed.tzinfo:
        parsed = parsed.replace(tzinfo=pytz.utc)
    return parsed.astimezone(pytz.utc)
Example #36
0
def is_modified(url, dstFile):
    r = requests.head(url)
    url_time = r.headers['last-modified']
    url_date = parsedate(url_time)
    file_time = datetime.datetime.fromtimestamp(os.path.getmtime(dstFile))
    print(url_date.timestamp(), file_time.timestamp())
    return url_date.timestamp() > file_time.timestamp()
def get_datetime(txt_record):
    datetime_string_matches = re.findall('^.*Last update (.*)\.', txt_record)
    date = ""
    if len(datetime_string_matches) > 0:
        date = datetime_string_matches[0]

    return parsedate(date)
Example #38
0
    def cp(self, link: str):
        ''' Copy file if it doesn't exist or if the remote modified time is newer than
        the current local version.
        '''
        logging.debug(f'Start copy on {link}')
        logging.debug(f'no parents: {self.no_parents}')
        if self.no_parents:
            local_link = link.replace(self.base_url, '')
            split_result = urlsplit(local_link)
        else:
            split_result = urlsplit(link)

        logging.debug(split_result)
        topdir = split_result.netloc.replace('.', '_')
        outpath = pathlib.Path(self.download_location, topdir,
                               *split_result.path.split('/'))
        logging.debug(f'Checking {link} against {outpath}')
        if not outpath.is_file():
            try:
                websync.sync_files(link, outpath)
            except RuntimeError:
                return None
            logging.debug(f"downloaded new file to {outpath}")
            return outpath
        elif self.update_existing:
            req = self.session.head(link)
            url_date = parsedate(req.headers['Last-Modified']).timestamp()
            if url_date > outpath.stat().st_mtime:
                websync.sync_files(link, outpath)
                logging.debug(f"downloaded newer file to {outpath}")
                return outpath
        return None
  def _loadAndSendData(self, sock, filePath, metricName):
    """
    Returns the list of labels from the csv at filePath. Date and value
    fields are sent to the metric specified. As a side effect this
    creates the metric.

    :param sock: A connected socket object
    :param filePath: The csv with data to handle
    :param metricName: The target custom metric we will send data to
    """
    labels = []
    for (dttm, value, label) in self._loadDataGen(filePath):
      # Parse date string
      dttm = parsedate(dttm)
      # Convert to seconds since epoch (Graphite wants this)
      dttm = epochFromNaiveUTCDatetime(dttm)
      dttm = int(dttm)

      #LOGGER.info("{TAG:CLIENT.METRIC} metric=%s:%s:%s", metricName, dttm,
      #            value)

      # Add data
      sock.sendall("%s %r %s\n" % (metricName, float(value), dttm))

      # Save the label for use later
      # Convert strings to appropriate numerical type
      try:
        labels.append(int(label))
      except ValueError:
        labels.append(float(label))

    self.gracefullyCloseSocket(sock)

    return labels
def grab(src, dest, name):
    src, dest, name = map(str, (src, dest, name))
    logger.info(f" src, dest, name --> {src} {dest} {name}")

    if not os.path.exists(dest):
        logger.info(f"Downloading {name}")
    elif not zipfile.is_zipfile(dest):
        logger.info(f"Downloading {name} (corrupt file)")
    elif not src.startswith("file://"):
        r = requests.head(src)
        file_time = datetime.datetime.fromtimestamp(os.path.getmtime(dest))
        url_time = file_time
        for _k in ['last-modified', 'Date']:
            if _k in r.headers:
                url_time = r.headers[_k]
        url_date = parsedate(url_time)
        utc = pytz.utc
        url_date = url_date.replace(tzinfo=utc)
        file_time = file_time.replace(tzinfo=utc)
        if url_date < file_time:
            # Do not download if older than the local one
            return
        logger.info(f"Downloading updated {name}")

    # Local file does not exist or remote one is newer
    if src.startswith("file://"):
        src2 = src.replace("file://", '')
        if not os.path.exists(src2):
            logger.info(f"Source location ({src2}) does not exist")
        else:
            logger.info(f"Copying local file from {src2}")
            shutil.copyfile(src2, dest)
    else:
        # urlretrieve(str(src), str(dest))
        # Streaming, so we can iterate over the response.
        r = requests.get(src, stream=True, timeout=10, verify=False)
        # Total size in bytes.
        total_size = int(r.headers.get('content-length', 0))
        logger.info(f"Requesting {src}")
        block_size = 1024
        wrote = 0
        with open("output.bin", 'wb') as f:
            for data in tqdm(r.iter_content(block_size),
                             total=math.ceil(total_size // block_size),
                             unit='KB',
                             unit_scale=False):
                wrote += len(data)
                f.write(data)
        logger.info(f" total_size [{total_size}] / wrote [{wrote}] ")
        if total_size != 0 and wrote != total_size:
            logger.error(
                f"ERROR, something went wrong. Data could not be written. Expected to write {wrote} but wrote {total_size} instead"
            )
        else:
            shutil.move("output.bin", dest)
        try:
            # Cleaning up
            os.remove("output.bin")
        except OSError:
            pass
Example #41
0
 def _parse_page(self, html):
     html = html.decode('ISO-8859-1', 'ignore')
     return [(
         r[3], parsedate('%s %s' % (r[0], r[1])), r[2]
     ) for r in re.findall(
         r'<event>.*?<date>(.*?)</date>.*?<time>(.*?)</time>.*?<location>(.*?)</location>.*?<description>(.*?)</description>.*?</event>',
         html, re.DOTALL)]
Example #42
0
def download_logs(urls):
    for url in urls:
        filename = wget.detect_filename(url)
        new_filename = "logs/" + data.filename_from_url(url)
        print "Checking log " + new_filename
        # If the path exists, check if there's a more up to date version and, if so, remove the old and download the new...
        if os.path.exists(new_filename):
            r = requests.head(url)
            if "last-modified" in r.headers:
                url_time = r.headers['last-modified']
                url_date = parsedate(url_time)
                file_time = datetime.datetime.fromtimestamp(os.path.getmtime(new_filename))
                url_date = url_date.replace(tzinfo=None)
                file_time = file_time.replace(tzinfo=None)
                pytz.UTC.localize(url_date)
                pytz.UTC.localize(file_time)
                if url_date > file_time:    
                    os.remove(new_filename)
                    download_log(url, new_filename)
            else:
                os.remove(new_filename)
                download_log(url, new_filename)
        # ... otherwise we just download it without any checks.
        else:
            download_log(url, new_filename)
Example #43
0
File: cfg.py Project: xorbit/PiSurv
def load_time_span(span_string):
    """Try to load a time span, skip invalid ones"""
    # Split into start and stop time string (separated by dash)
    m = re.match(r'^\s*(.+?)\s*-\s*(.+?)\s*$', span_string)
    # Quit if we didn't get a valid span
    if not m:
        print "Invalid time span: specify two time specs separated by '-'"
        return
    # Check that both time specs specify a valid time
    for i in range(1, 3):
        try:
            parsedate(m.group(i))
        except:
            print "Invalid time spec: %s" % m.group(i)
            return
    # Start and end time strings
    REC_TIME_SPAN.append({'start': m.group(1), 'stop': m.group(2)})
Example #44
0
def coerce_to_date(x):
    if isinstance(x, tuple):
        return datetime(*x)
    if isinstance(x, str):
        return parsedate(x)
    if isinstance(x, datetime):
        return x
    return None
Example #45
0
    def load_group(self, slug):
        root = self._load_xml(slug)
        group_name = root.find('name').text
        group_url = root.find('url').text.strip()
        _dml = root.find('defaultmeetinglocation')

        schedule = None
        _schedule = root.find('schedule')

        default_location = None
        if _dml is not None:
            use_dml = _schedule.get('usedefaultmeetinglocation', '') == 'true'
            if use_dml:
                default_location = self._extract_location(_dml)

        tags = []
        _tags = root.find('tags')
        for _tag in _tags.findall('tag'):
            tags.append(_tag.text.lower())

        if _schedule is not None:
            ical_feed = _schedule.find('ical')
            if ical_feed is not None:
                schedule = IcalSchedule(ical_feed.text)
            else:
                meetings = []
                for meeting in _schedule.findall('meeting'):
                    time = parsedate(meeting.find('time').text).astimezone(utc)
                    name = meeting.find('name').text

                    description = None
                    _description = meeting.find('description')
                    if _description is not None:
                        description = self._normalize_description(
                            _description.text)

                    url = group_url
                    _url = meeting.find('url')
                    if _url is not None:
                        url = _url.text.strip()

                    location = self._extract_location(meeting.find('location'))
                    location = location or default_location
                    m = Meeting(name=name, time=time,
                                description=description,
                                url=url, location=location)

                    self.nominatim.resolve_coordinates(m)
                    meetings.append(m)

                schedule = StaticSchedule(meetings)

        thumbnail = self._get_thumbnail(slug)

        return UserGroup(slug=slug, name=group_name, schedule=schedule,
                         url=group_url, default_location=default_location,
                         tags=tags, thumbnail=thumbnail)
Example #46
0
 def _UI_TimeTweet(self,tw=None):
     if self._state == "start":
         self.__last_tweet = {us:None for us in self._usrset}
         return
     if self._state == "end":
         return
     if tw['user']['screen_name'] not in self._usrset: return "."
     oC = self.__last_tweet
     usr = tw['user']['screen_name']
     lTw = oC[usr]
     oC[usr] = tw
     if lTw is None: return "."
     if lTw['id'] == tw['id']: return "."
     prev = parsedate(lTw['created_at'])
     latt = parsedate(tw['created_at'])
     delta = latt - prev
     if delta.total_seconds() == 0 and tw['text'] == lTw['text']: return "."
     return delta.total_seconds()
def check_file(file_name):
    """
    Check the given file for parse errors.
    :param file_name: file name to check
    :return: str error message.
    """
    if file_name.endswith('.md'):
        parser = parse_md_file
        required_fields = REQUIRED_FIELDS
    elif file_name.endswith('.yml'):
        parser = parse_yml_file
        required_fields = REQUIRED_YAML_FIELDS
    else:
        return 'Unknown file type: %s' % file_name

    try:
        data = parser(file_name)
    except Exception as e:
        return str(e)

    if HOF_FILENAME_RE.search(file_name):
        return check_hof_data(data)

    if 'mfsa_id' not in data:
        return 'The MFSA ID must be in the filename or metadata.'

    for field in required_fields:
        if field not in data:
            return 'The {0} field is required in the file metadata.'.format(field)

    if 'announced' in data:
        try:
            parsedate(data['announced']).date()
        except Exception:
            return 'Failed to parse "{}" as a date'.format(data['announced'])

    if file_name.endswith('.yml'):
        for cve, advisory in data['advisories'].items():
            for field in REQUIRED_YAML_ADVISORY_FIELDS:
                if field not in advisory:
                    return 'The {0} field is required in the ' \
                           'file metadata for {1}.'.format(field, cve)

    return None
Example #48
0
def get_time_between_tweets(twts,usrlst,condition=lambda x: True):
    tweetTimes = {usr:[] for usr in usrlst}
    lastTwts = {usr: None for usr in usrlst}
    usrset = set(usrlst)
    for tw in twts:
        if tw['user']['screen_name'] not in usrset: continue
        if not condition(tw): continue
        user = tw['user']['screen_name']
        lastTwt = lastTwts[user]
        lastTwts[user] = tw
        if lastTwt is None: continue
        oldDate = parsedate(lastTwt['created_at'])
        nwDate = parsedate(tw['created_at'])
        diff = nwDate - oldDate
        tweetTimes[user].append(diff.total_seconds())

    res = {usr:0 if len(tweetTimes[usr]) == 0 else sum(tweetTimes[usr])/len(tweetTimes[usr]) 
           for usr in usrlst}
    return res
Example #49
0
 def _UI_TimeTweet(self,tw=None):
     if self._state == "start":
         self.__last_tweet = {us:None for us in self._usrset}
         self.__tweet_times = {us:[] for us in self._usrset}
         return
     if self._state == "end":
         oC = self.__tweet_times
         res = {us:(0 if len(oC[us]) == 0 else float(mean(oC[us]))) for us in self._usrset}
         return res
     if tw['user']['screen_name'] not in self._usrset: return
     oC = self.__last_tweet
     usr = tw['user']['screen_name']
     lTw = oC[usr]
     oC[usr] = tw
     if lTw is None: return
     prev = parsedate(lTw['created_at'])
     latt = parsedate(tw['created_at'])
     delta = latt - prev
     self.__tweet_times[usr].append(delta.total_seconds())
Example #50
0
def get_time_between_mentions(twts,usrlst):
    tweetTimes = {usr:[] for usr in usrlst}
    lastTwts = {usr: None for usr in usrlst}
    usrset = set(usrlst)
    for tw in twts:
        ments = set(usr['screen_name'] for usr in tw['entities']['user_mentions'])
        mentIusrs = ments.intersection(usrset)
        for cmaker in mentIusrs:
            lastTwt = lastTwts[cmaker]
            lastTwts[cmaker] = tw
            if lastTwt is None: continue
            oldDate = parsedate(lastTwt['created_at'])
            nwDate = parsedate(tw['created_at'])
            diff = nwDate - oldDate
            tweetTimes[cmaker].append(diff.total_seconds())
        pass
    res = {usr:0 if len(tweetTimes[usr]) == 0 else sum(tweetTimes[usr])/len(tweetTimes[usr]) 
           for usr in usrlst}
    return res
Example #51
0
    def __init__(self, assets, weights=None, date=None):
        """Initializes the given portfolio with a list of assets and
           optionally, a weights vector and a reference date."""

        # if assets are given inside a string, split them first
        if isinstance(assets, str):
            assets = assets.split()

        # cast assets to a list, whenever the object is iterable
        try:
            assets = [a for a in assets]
        except TypeError:
            raise TypeError("assets must to be an iterable object")

        assets_nr = len(assets)
        if not assets_nr > 0:
            raise ValueError("needed at least one asset, given {0}". \
                format(assets_nr))

        # if weights aren't given, assume it's equally weighted portfolio
        if weights is None:
            weights = [1.0/assets_nr  for _ in assets]

        # cast weights to a list, whenever the object is iterable
        try:
            weights = [w for w in weights]
        except TypeError:
            raise TypeError("weights must to be an iterable object")

        if assets_nr != len(weights):
            raise ValueError("expecting a weights vector of length {0}". \
                format(assets_nr))

        if sum(weights) > 1.0:
            raise ValueError("weights cannot exceed 1.0, but its sum is {0}". \
                format(weights_sm))

        # if date isn't given, assume it is today
        if date is None:
            date = dt.datetime.combine(dt.date.today(), dt.time())

        # if date is a string, try to parse it
        if isinstance(date, str):
            date = parsedate(date)

        # check if the type of reference date is `datetime`
        if not isinstance(date, dt.datetime):
            raise TypeError("expecting a `datetime` instance for " +
                "the reference date")

        # initializes the internal data frame
        self._data = pd.DataFrame(np.array([weights]), columns=assets,
                index=[date])

# EOF
Example #52
0
    def _set_aws_auth(self):
        """
        _set_aws_auth gets key, secret, token and expiration either from a
        file or from a temporary instance and sets them
        """

        cred_tuple = get_aws_creds(self.run_local)
        self._aws_key = cred_tuple.access_key_id
        self._aws_secret = cred_tuple.secret_access_key
        self._aws_token = cred_tuple.token
        self._aws_token_expiry = parsedate(cred_tuple.expiration)
Example #53
0
 def _EG_MentionTime(self,tw=None):
     if self._state == "start":
         self.__last_mention = {us:None for us in self._usrset}
         self.__mention_times = {us:[] for us in self._usrset}
         return
     if self._state == "end":
         oC = self.__mention_times
         res = {us:float(sum(oC[us]))/len(oC[us]) if len(oC[us]) > 0 else 0 for us in self._usrset}
         return res
     mentioned = set([mn['screen_name'] for mn in tw['entities']['user_mentions']])
     carMakerMentions = self._usrset.intersection(mentioned)
     if len(carMakerMentions) == 0: return
     latter = parsedate(tw['created_at'])
     for cmaker in carMakerMentions:
         first = self.__last_mention[cmaker]
         self.__last_mention[cmaker] = tw
         if first is None: continue
         first = parsedate(first['created_at'])
         diff = latter - first
         self.__mention_times[cmaker].append(diff.total_seconds())
Example #54
0
 def _MX_TimeReplies(self,tw=None):
     if self._state == "start":
         self.__last_yesreply = {us:None for us in self._usrset}
         return
     if self._state == "end":
         return
     if tw['user']['screen_name'] not in self._usrset: return
     oC = self.__last_yesreply
     usr = tw['user']['screen_name']
     lTw = oC[usr]
     if (('quoted_status_id_str' in tw and tw['quoted_status_id_str'] is not None) or
         ('in_reply_to_screen_name' in tw and tw['in_reply_to_screen_name'] is not None) or
         ('in_reply_to_status_id' in tw and tw['in_reply_to_status_id'] is not None)):
         oC[usr] = tw
     if lTw is None: return "."
     if lTw['id'] == tw['id']: return "."
     prev = parsedate(lTw['created_at'])
     latt = parsedate(tw['created_at'])
     delta = latt - prev
     if delta.total_seconds() == 0 and tw['text'] == lTw['text']: return "."
     return delta.total_seconds()
Example #55
0
 def test_feedPubDateUsesNewestEpisode(self):
     self.fg.episodes[0].publication_date = \
         datetime.datetime(2015, 1, 1, 15, 0, tzinfo=pytz.utc)
     self.fg.episodes[1].publication_date = \
         datetime.datetime(2016, 1, 3, 12, 22, tzinfo=pytz.utc)
     self.fg.episodes[2].publication_date = \
         datetime.datetime(2014, 3, 2, 13, 11, tzinfo=pytz.utc)
     rss = self.fg._create_rss()
     pubDate = rss.find("channel").find("pubDate")
     assert pubDate is not None
     parsedPubDate = parsedate(pubDate.text)
     assert parsedPubDate == self.fg.episodes[1].publication_date
Example #56
0
 def _PT_Weekday(self,tw=None):
     if self._state == "start":
         #self.__dayRank = {e:i for i,e in enumerate([6,5,0,3,4,2,1])}
         rankWeek = [5, 6, 4, 3, 2, 1, 0]
         self.__dayRank = {e:int(i*3/len(rankWeek)) for i,e in enumerate(rankWeek)}
         return
     if self._state == "end":
         return
     if tw['user']['screen_name'] not in self._usrset: return "."
     twDate = parsedate(tw['created_at'])
     if twDate.weekday() >= 5: return 0 # 5 and 6 are Saturday Sunday
     return 1
Example #57
0
 def _PT_PeakHour(self,tw=None):
     if self._state == 'start':
         self.__peakhr_tweets = {us:0 for us in self._usrset}
         self.___peakhrs = set([16,17,18,19])
         return
     if self._state == 'end':
         tC = self.__tweet_count
         fC = self.__peakhr_tweets
         return {us:float(fC[us])/tC[us] if tC[us] > 0 else 0 for us in self._usrset}
     if tw['user']['screen_name'] not in self._usrset: return
     if parsedate(tw['created_at']).time().hour in self.___peakhrs:
         us = tw['user']['screen_name']
         self.__peakhr_tweets[us] += 1
Example #58
0
 def _PT_Weekday(self,tw=None):
     if self._state == "start":
         self.__weekday_count = {us:0 for us in self._usrset}
         return
     if self._state == "end":
         oC = self.__weekday_count
         tC = self.__tweet_count
         res = {us:float(oC[us])/float(tC[us]) if tC[us] > 0 else 0 for us in self._usrset}
         return res
     if tw['user']['screen_name'] not in self._usrset: return
     twDate = parsedate(tw['created_at'])
     if twDate.weekday() >= 5: return # 5 and 6 are Saturday Sunday
     self.__weekday_count[tw['user']['screen_name']] += 1
Example #59
0
    def _parse_game(self, date_string, time_string, matchup_string, location_string):
        # Parse date and time into a datetime.
        date = parsedate(date_string).date()
        time = parsedate(time_string).time()
        game_time = datetime.datetime.combine(date, time)

        # Parse matchup.
        # Playoffs start with "(Pla)", so have to pull that off the front.
        if matchup_string.startswith('(Pla) '):
            matchup_string = matchup_string[6:]
            playoffs = True
        else:
            playoffs = False
        teams = matchup_string.split(' at ')
        if teams[0] == self._team_name:
            opponent = teams[1]
            is_home = False
        else:
            opponent = teams[0]
            is_home = True

        return Game(game_time, opponent, is_home, location_string, playoffs=playoffs)
Example #60
0
 def _PT_PeakHour(self,tw=None):
     if self._state == 'start':
         hrRank = [9, 10, 8, 7, 11, 6, 5, 12, 4, 3, 2, 13, 1,
                   0, 14, 23, 15, 22, 21, 20, 19, 16, 18, 17]
         self.___hrsRanks = {e:int(i*2/len(hrRank)) for i,e in enumerate(hrRank)}
         #self.___peakhrs = set([15,16,17,18,19,20])
         self.___peakhrs = set(range(8,18))
         return
     if self._state == 'end':
         return
     if tw['user']['screen_name'] not in self._usrset: return "."
     #twTime = parsedate(tw['created_at']).time().hour
     hour = parsedate(tw['created_at']).time().hour
     return 1 if hour in self.___peakhrs else 0