Beispiel #1
0
def check_datetime(field):
    field = field.replace('"', '')
    try:
        parser.parser(field)
        return True
    except Exception:
        return False
Beispiel #2
0
def date_offset(date):
	"""Check for invalid date formats i.e missing year or random string."""
	date_attr = ("year", "day", "month")
	if parser()._parse(date)[0] is None:
		return False
	for attr in date_attr:
		if getattr(parser()._parse(date)[0], attr) is None:
			return False
	return True
def valid_trans_dt(date):
    """
    :param date: incoming date field
    :return: True if the date is in mmddyyyy format else False
    """
    date_string = date[0:2] + '-' + date[2:4] + '-' + date[4:]
    try:
        parser(date_string)
        return True
    except ValueError:
        return False
Beispiel #4
0
    def create_data(self):
        file = "./data/" + str(self.__name)
        f = open(file, 'r')
        data = list(csv.reader(f))
        data = data[1::]
        data.reverse()

        parser.parser('2018-03-28')
        time = [parser.parse(i[1]) for i in data]
        exchange_rate = [float(it[2]) for it in data]

        return time, exchange_rate
Beispiel #5
0
def is_dateutil_result_obj_parsed(date_string):
    # handle dateutil>=2.5 tuple result first
    try:
        res, _ = parser()._parse(date_string)
    except TypeError:
        res = parser()._parse(date_string)
    if not res:
        return False

    def get_value(obj, key):
        value = getattr(obj, key)
        return str(value) if value is not None else ''

    return any([get_value(res, k) for k in res.__slots__])
Beispiel #6
0
def is_dateutil_result_obj_parsed(date_string):
    # handle dateutil>=2.5 tuple result first
    try:
        res, _ = parser()._parse(date_string)
    except TypeError:
        res = parser()._parse(date_string)
    if not res:
        return False

    def get_value(obj, key):
        value = getattr(obj, key)
        return str(value) if value is not None else ''

    return any([get_value(res, k) for k in res.__slots__])
Beispiel #7
0
    def get_articles(self):
        articles = []
        for entry in self._feed.entries:

            publish_date = None
            published = entry.get('published', None)
            if published:
                publish_date = parser().parse(published)

            body = None
            content = entry.get('content', None)
            if content:
                body = content[0].get('value', None)

            description = entry.get('description', None)
            if description:
                description = unicode(BeautifulSoup(description, features='html'))
                description = remove_html_tags(description)

            articles.append(Article(url=self._encode(entry.get('link', None)),
                                    title=self._encode(entry.get('title', None)),
                                    publish_date=publish_date,
                                    description=self._encode(description),
                                    body=self._encode(body)))
        return articles
Beispiel #8
0
def clean_file(r_file, w_file):
	"""Takes test.csv, cleans the Bio, State, and Start_date columns, 
	and writes normalized output to solution.csv

	"""
	test_csv = open(r_file, 'rb')
	solution_csv = open(w_file, 'w')
	reader = csv.reader(test_csv)
	writer = csv.writer(solution_csv)
	headers = next(reader, None)
	state_dict = create_state_dict();

	if headers:
		headers.append('start_date_description')
		writer.writerow(headers)

	for row in reader:
		# replace state abbrev with state name
		row[5] = state_name(row[5], state_dict)
		# normalize bio
		row[8] = strip_space(row[8])
		# convert date to ISO format if original date is a valid format
		if date_offset(row[10]):
			row[10] = parser().parse(row[10]).strftime('%Y-%m-%d')
		else:
			# append invalid date to adjacent column
			row = row[:11] + [row[10]]

		writer.writerow(row)

	test_csv.close()
	solution_csv.close()
Beispiel #9
0
def execute_dag(dag, exec_date):
    dag._job_id_mapping = MockIdMapping()
    dagrun = dag.create_dagrun(
        run_id='airflow_run_id_0001',
        execution_date=parser().parse(exec_date),
        state='scheduled')
    dag.handle_callback(dagrun, success=True)
Beispiel #10
0
    def _parse_data(self, raw_data, var_filter, time_extents):
        """
        Transforms raw HADS observations into a dict:
            station code -> [(variable, time, value), ...]

        Takes into account the var filter (if set).
        """

        retval = defaultdict(list)
        p = parser()

        begin_time, end_time = time_extents

        for line in raw_data.splitlines():
            if len(line) == 0:
                continue

            fields = line.split("|")[0:-1]
            if var_filter is None or fields[2] in var_filter:
                dt = p.parse(fields[3]).replace(tzinfo=pytz.utc)
                if (begin_time is None or dt >= begin_time) and (end_time is None or dt <= end_time):
                    value = float(fields[4]) if fields[4] != 'NaN' else npNan
                    retval[fields[0]].append((fields[2], dt, value))

        return dict(retval)
Beispiel #11
0
def index():
    response = utils.send_http_request('get', DIARY_ALL_URL)
    data = response.json()
    for entry in data['entries']:
        date_time = parser().parse(entry['date_time'])
        entry['date_time'] = date_time.strftime('%d.%m %H:%M')
    return render_template('index.html', entries=data['entries'])
Beispiel #12
0
def get_obs(res, entity, source):
    current_node = list(filter(lambda x: x['value'] == entity.value,
                               res['nodes']))[0]
    nodes = list(filter(lambda x: x['value'] != entity.value,
                        res['nodes']))
    links = res['links']

    nodes = {n['_id']: n for n in nodes}

    selected_nodes = {_id:
                          list(filter(
                              lambda x: x['src']['id'] == _id or
                                        x['dst']['id'] == _id,
                              links))[0] for _id in nodes.keys()}

    for _id, n in selected_nodes.items():
        type_obs = nodes[_id]['_cls'].split('.')[1]
        obs = str_to_class(type_obs)(nodes[_id]['value'])
        history = sorted(
            list(filter(lambda x: source in x['sources'],
                        n['history'])),
            key=lambda x: parser.parser(x['last_seen']))

        obs.link_label = '%s:%s' % (history[0]['description'],
                                    history[0]['last_seen'])

        yield obs
Beispiel #13
0
def convert_to_datetime(input_date_str):
    """
    Converts a string representation of an isodate into a datetime instance.
    Handles the situation of the seconds containing decimal values
    Below are all valid string representation this function will convert:
     '2012-09-19T19:01:55.008000+00:00'
     '2012-09-19T19:01:55+00.00'
     '2012-09-19T19:01:55'
    @param input_date_str:
    @return:
    @rtype: datetime.datetime
    """
    def ensure_tzinfo(input):
        if not input.tzinfo:
            return input.replace(tzinfo=tzutc())
        return input

    if not input_date_str or not input_date_str.strip():
        return None
    try:
        return ensure_tzinfo(isodate.parse_datetime(input_date_str))
    except Exception, e:
        try:
            # Fallback to a non iso format datetime parser
            from dateutil.parser import parser
            return ensure_tzinfo(parser().parse(input_date_str))
        except:
            pass
        _LOG.exception("Unable to parse date: %s" % (input_date_str))
        raise UnsupportedDateFormatException(input_date_str)
Beispiel #14
0
def datetime_string(string, custom_format=None):
	"""
	Takes a string and parses it into a datetime object with the dateutil module if present.
	If not it will fall back to a more rudimentary method of using strptime with a list of
	predefined formats with the option of passing in a custom format to try before the others.
	The first one to parse correctly gets returned.
	"""
	try:
		# noinspection PyUnresolvedReferences
		from dateutil.parser import parser
		return parser().parse(string)
	except ImportError:
		string = string.replace('/', '-')
		formats = [
			'%Y',
			'%Y-%m',
			'%Y-%m-%d',
			'%Y-%m-%d %H',
			'%Y-%m-%d %I %p'
			'%Y-%m-%d %H:%M',
			'%Y-%m-%d %I:%M %p'
			'%Y-%m-%d %H:%M:%S',
			'%Y-%m-%d %I:%M:%S %p'
		]
		if custom_format:
			formats.insert(0, custom_format)
		for f in formats:
			try:
				return datetime.strptime(string, f)
			except ValueError:
				continue
		raise ValueError('The string did not match any configured format')
Beispiel #15
0
    def _parse_data(self, raw_data, var_filter, time_extents):
        """
        Transforms raw HADS observations into a dict:
            station code -> [(variable, time, value), ...]

        Takes into account the var filter (if set).
        """

        retval = defaultdict(list)
        p = parser()

        begin_time, end_time = time_extents

        for line in raw_data.splitlines():
            if len(line) == 0:
                continue

            fields = line.split("|")[0:-1]
            if var_filter is None or fields[2] in var_filter:
                dt = p.parse(fields[3]).replace(tzinfo=pytz.utc)
                if (begin_time is None
                        or dt >= begin_time) and (end_time is None
                                                  or dt <= end_time):
                    value = float(fields[4]) if fields[4] != 'NaN' else npNan
                    retval[fields[0]].append((fields[2], dt, value))

        return dict(retval)
Beispiel #16
0
    def run(self, is_all=False):
        """
        登録済みFeedについてすべて取得する
        :return:
        """

        for feed in self.feed_list:
            print(feed.get_max_timestamp())
            if is_all:
                feed_result = feedparser.parse(feed.url)
            else:
                feed_result = feedparser.parse(
                    feed.url, modified=feed.get_max_timestamp())
            #feed_result = feedparser.parse(feed.url)
            images_buffer = []
            first_article_buffer = None
            for i, entry in enumerate(feed_result.entries):
                print(entry.title)
                res = requests.get(entry.link)
                update_time_str = entry.updated
                new_article = Articles()
                new_article.url = res.url
                new_article.title = entry.title
                new_article.summary = entry.summary
                new_article.timestamp = parser().parse(timestr=update_time_str)
                new_article.source = feed

                soup = BeautifulSoup(res.content, "html.parser")
                # 1記事目は画像リストだけとっておく。実際の保存は2記事目後
                if i == 0:
                    images_buffer = self._get_all_image_urls(soup)
                    first_article_buffer = (soup, new_article)
                    continue

                try:
                    article_image_url_list = self._get_all_image_urls(soup)
                    for img_src in article_image_url_list:
                        if img_src not in images_buffer:
                            new_article.thumbnail = self._image_to_base64(
                                img_src, new_article.url)
                            break

                except IndentationError as ex:
                    print(ex)

                    pass
                new_article.save()

                # 最初の記事で固有の画像がわかるのは2記事目を見た後
                if i == 1:
                    images_buffer = self._get_all_image_urls(soup)
                    for img_src in self._get_all_image_urls(
                            first_article_buffer[0]):
                        if img_src not in images_buffer:
                            first_article_buffer[
                                1].thumbnail = self._image_to_base64(
                                    img_src, first_article_buffer[1].url)
                            first_article_buffer[1].save()
                            break
Beispiel #17
0
def read_and_preprocess_training_data(file_path):
    dataframe = pd.read_csv(file_path)
    traits_data = []
    label_data = dataframe.loc[:, 'speed']
    p = parser()
    for date in dataframe.loc[:, 'date']:
        traits_data.append(extract_date_and_time(date, p))
    return np.array(traits_data), np.array(label_data)
Beispiel #18
0
    def parse_datetime(date_text):
        utc = timezone('UTC')
        tz = timezone('Europe/Moscow')

        time_parser = parser()

        return time_parser.parse(date_text).replace(tzinfo=utc).astimezone(
            tz=tz)
Beispiel #19
0
 def __init__(self, path):
     self.idxpath = path
     self.ix = open_dir(self.idxpath)
     self.query = MultifieldParser(['content','ctime'], schema=self.ix.schema)
     self.query.add_plugin(DateParserPlugin())
     self.sorter = MultiFacet(["ctime", ScoreFacet()])
     self.parser = ttp.Parser();
     self.dateparser = parser.parser();
Beispiel #20
0
class DatetimeISOFormatJSONDecoder(JSONDecoder):
    """
    JSON decoder for datetime as ISO 8601 formatted string.
    """
    _DATE_PARSER = parser()

    def decode(self, to_decode: str, **kwargs) -> datetime:
        return DatetimeISOFormatJSONDecoder._DATE_PARSER.parse(to_decode)
Beispiel #21
0
def get_new_data():
    last_seconds = seconds_from_last()
    if last_seconds:
        past_seconds = int(last_seconds) + 1
    else:
        past_seconds = 604800  # 7 days, max The Things Network storage allows

    for each_device in devices:
        endpoint = "https://{cluster_loc}.cloud.thethings.network/api/v3/as/applications/{app}/devices/{dev}/packages/storage/uplink_message?order=-received_at&type=uplink_message?last={time}".format(
            cluster_loc=cluster,
            app=application,
            dev=each_device,
            time="{}s".format(past_seconds))
        logger.info(endpoint)
        key = 'Bearer {}'.format(app_key)
        headers = {'Accept': 'text/event-stream', 'Authorization': key}
        response = requests.get(endpoint, headers=headers)
        if response.status_code != 200:
            logger.info(response.reason)
        try:
            response_format = "{\"data\": [" + response.text.replace(
                "\n\n", ",")[:-1] + "]}"
            response_data = json.loads(response_format)
            uplink_msg = response_data["data"]
            for each_resp in uplink_msg:
                response_data = each_resp["result"]
                uplink_message = response_data["uplink_message"]

                received = response_data["received_at"]
                lat = uplink_message["decoded_payload"].get("latitude", "")
                lon = uplink_message["decoded_payload"].get("longitude", "")
                alt = uplink_message["decoded_payload"].get("altitude", "")
                qos = uplink_message["decoded_payload"].get("hdop", "")
                end_device_ids = response_data["end_device_ids"]
                device = end_device_ids["device_id"]
                rawpay = uplink_message["frm_payload"]

                if (not Location.query.filter(
                        Location.datetime == received).first()
                        and -90 < float(lat) <= 90
                        and -120 <= float(lon) <= 80):
                    logger.info("{}, {}".format(lat, lon))
                    new_location = Location(
                        device_id=device,
                        raw=rawpay,
                        datetime_obj=parser().parse(received),
                        datetime=received,
                        latitude=lat,
                        longitude=lon,
                        altitude=alt,
                        hdop=qos)
                    db.session.add(new_location)
                    db.session.commit()
                    logger.info(new_location)
        except:
            pass

    set_date_now()
Beispiel #22
0
 def __init__(self, path):
     self.idxpath = path
     self.ix = open_dir(self.idxpath)
     self.query = MultifieldParser(['content', 'ctime'],
                                   schema=self.ix.schema)
     self.query.add_plugin(DateParserPlugin())
     self.sorter = MultiFacet(["ctime", ScoreFacet()])
     self.parser = ttp.Parser()
     self.dateparser = parser.parser()
    def _parse_datetime(dt_str: str) -> datetime:
        """
        Params:
            dt_str: String representing a datetime.

        Returns:
            Object representing the datetime contained in `dt_str`.
        """
        return parser().parse(dt_str)
Beispiel #24
0
 def extract_fingerprint(self, name, tag="latest"):
     """ extract information from get_detail to figure out if there is a newer version """
     detail = self.get_detail(name, tag)
     if len(detail) >= 1:
         datep = parser()
         dic = json.loads(detail['history'][0]['v1Compatibility'])
         return dic['id'], int(datep.parse(dic["created"]).strftime('%s'))
     else:
         return None, None
Beispiel #25
0
def evaluate_data(field_type, value):
    if not field_type:
        return value
    # bool, bin, int, float, complex, str, bytes, list, tuple, set, frozenset, dict
    field_type = field_type.lower()
    if field_type in ["int", "float", "bool"]:
        return eval("{}({})".format(field_type, value.capitalize()))
    if field_type == "datetime":
        return parser().parse(value)
    if field_type in ["date", "time", "year", "month", "day", "weekday", "hour"]:
        tmp = parser().parse(value).__getattribute__(field_type)
        return tmp() if callable(tmp) else tmp
    # custom data-type
    if field_type == "sdate":
        return int(parser().parse(value).strftime("%y%m%d"))
    if field_type == "oid":
        return ObjectId(value)
    raise TypeError('Given data type "%s" is not supported!' % field_type)
Beispiel #26
0
def process(page):
	contents = page.edit()
	wikicode = mwparserfromhell.parse(contents)
	for template in wikicode.filter_templates():
		if template.name.lower().strip() in INFOBOX_TITLES and template.has_param(PARAM):
			pub_date_stripped = template.get(PARAM).value.strip_code().strip() # This helps with parsing wikicode-ified dates
			pub_date_raw = template.get(PARAM).value.strip()
			if pub_date_raw.lower().find("{{start") == -1 and pub_date_raw.find("[[Category:Infoboxes needing manual conversion") == -1:
				try:
					date = parser.parser().parse(pub_date_stripped,None)
				except ValueError:
					# If the date is ambiguous, e.g., "2-2-2012," tag it for manual conversion
					date = None

				if date is None or date.year is None:
					if pub_date_raw.find("<!-- Date published") == -1:
						template.add(PARAM,pub_date_raw+" [[Category:Infoboxes needing manual conversion to use start date]]")
						page.save(unicode(wikicode),u'[[WP:BOT|Bot]]: Tagging unparsable {}'.format(PARAM))
						continue
					else:
						continue					

				if date.utcoffset() and date.utcoffset().total_seconds() != 0:
					# If the date has timezone info and the timezone isn't UTC, skip it 
					template.add(PARAM,pub_date_raw+" [[Category:Infoboxes needing manual timezone conversion to use start date]]")
					page.save(unicode(wikicode),u'[[WP:BOT|Bot]]: Tagging {} in need of manual conversion to use [[Template:Start date]]'.format(PARAM))
					continue

				if not (1583 <= date.year <= 9999): # {{start date}} is only for dates in the ISO 8601 date range
					template.add(PARAM,pub_date_raw+" <!-- Date should NOT be converted to use {{start date}}, since it is outside of the ISO 8601 date range -->")
					page.save(unicode(wikicode),u'[[WP:BOT|Bot]]: Tagging out-of-range {}'.format(PARAM))
					continue

				if re.search(r"""\d{1,2} [a-zA-Z]* \d{4}""",pub_date_raw,flags=re.U) is not None:
					df = True
				else:
					df = False
				startdate = mwparserfromhell.nodes.Template(name='start date')
				if date.year:
					startdate.add(1,date.year)
				if date.month:
					startdate.add(2,date.month)
				if date.day:
					startdate.add(3,date.day)				
				if date.hour:
					startdate.add(4,date.hour)
				if date.minute:
					startdate.add(5,date.minute)
				if date.second:
					startdate.add(6,date.second)
				if df:
					startdate.add('df','y')
				template.add(PARAM,unicode(startdate)+"<!-- Bot-converted date -->")
				page.save(unicode(wikicode),u'[[WP:BOT|Bot]]: Converting '+PARAM+' to utilize {{[[Template:start date|]]}}')
			else:
				continue
Beispiel #27
0
def is_dateutil_result_obj_parsed(date_string):
    res = parser()._parse(date_string)
    if not res:
        return False
    
    def get_value(obj, key):
        value = getattr(obj, key)
        return str(value) if value is not None else ''

    return any([get_value(res, k) for k in res.__slots__])
Beispiel #28
0
def parse_or_none(datetimestr: str) -> Optional[str]:
    """Check if a string is parseable to datetime

    If a string can be parsed to datetime using dateutil's parser,
    return back the string. Otherwise, return None.

    Args:
        datetimestr (str): A string to be checked

    Returns:
    Optional[str]: ...
        - str: Return back datetimestr if it is parseable to datetime
        - None: Return None if datetimestr is not parseable to datetime
    """
    try:
        parser().parse(datetimestr)
        return datetimestr
    except ParserError:
        return None
Beispiel #29
0
def is_dateutil_result_obj_parsed(date_string):
    res = parser()._parse(date_string)
    if not res:
        return False

    def get_value(obj, key):
        value = getattr(obj, key)
        return str(value) if value is not None else ''

    return any([get_value(res, k) for k in res.__slots__])
    def testCustomParserInfo(self):
        # Custom parser info wasn't working, as Michael Elsdörfer discovered.
        from dateutil.parser import parserinfo, parser

        class myparserinfo(parserinfo):
            MONTHS = parserinfo.MONTHS[:]
            MONTHS[0] = ("Foo", "Foo")
        myparser = parser(myparserinfo())
        dt = myparser.parse("01/Foo/2007")
        self.assertEqual(dt, datetime(2007, 1, 1))
Beispiel #31
0
def parse_datetime(msg):
    '''
    将每个提取到的文本日期串进行时间转换,
    用正则表达式将日期串进行切割,然后针对每个
    子维度单独进行识别
    :param msg:
    :return:
    '''
    # print('msg:', msg)
    if msg is None or len(msg) == 0:
        return None

    try:
        dt = parser(msg, fuzzy=True)  # parse是根据字符串解析成datetime
        return dt.strftime('%Y-%m-%d %H:%M:%S')
    except Exception as e:
        '''
        用于处理阿拉伯数字与汉字混杂的日期串的提取,
        还加入了早中晚上下的考虑,调整输出的时间格式
        '''
        m = re.match(
            r"([0-9零一二两三四五六七八九十]+年)?([0-9一二两三四五六七八九十]+月)?([0-9一二两三四五六七八九十]+[号日])?([上中下午晚早]+)?([0-9零一二两三四五六七八九十百]+[点:.\时])?([0-9零一二三四五六七八九十百]+分?)?([0-9零一二三四五六七八九十百]+秒)?",
            msg)
    # print('m.group:', m.group(0), m.group(1), m.group(2), m.group(3), m.group(4), m.group(5))
    if m.group(0) is not None:
        res = {
            "year": m.group(1),
            "month": m.group(2),
            "day": m.group(3),
            "noon": m.group(4),  # 上中下午晚早
            "hour": m.group(5) if m.group(5) is not None else '00',
            "minute": m.group(6) if m.group(6) is not None else '00',
            "second": m.group(7) if m.group(7) is not None else '00',
        }
        params = {}
        for name in res:
            if res[name] is not None and len(res[name]) != 0:
                tmp = None
                if name == 'year':
                    tmp = year2dig(res[name][:-1])
                else:
                    tmp = cn2dig(res[name][:-1])
                if tmp is not None:
                    params[name] = int(tmp)
        target_date = datetime.today().replace(**params)
        print('target_date:', target_date)
        is_pm = m.group(4)
        if is_pm is not None:
            if is_pm == u'下午' or is_pm == u'晚上' or is_pm == '中午':
                hour = target_date.time().hour
                if hour < 12:
                    target_date = target_date.replace(hour=hour + 12)
        return target_date.strftime('%Y-%m-%d %H:%M:%S')
    else:
        return None
Beispiel #32
0
 def characters(self, content):
     if self.flag_key:
         self.key.name = content
     elif self.flag_last_modified:
         self.key.last_modified = parser().parse(content)
     elif self.flag_etag:
         self.key.etag = content
     elif self.flag_size:
         self.key.size = int(content)
     elif self.flag_prefix:
         self.prefixes.append(CommonPrefix(content))
Beispiel #33
0
    def date(self):
        from dateutil.parser import parser
        from datetime import datetime

        date = (self.meta.has_key("date") and parser().parse(self.meta.date)) or datetime.fromtimestamp(
            os.stat(os.path.join(settings.POST_ROOT, self.slug)).st_mtime
        )
        if not self.meta.has_key("date") and self.meta.get("status", "*no-status*") == "published":
            self.meta.date = date.isoformat()
            self.save()
        return date
Beispiel #34
0
 def characters ( self, content ):
     if self.flag_key:
         self.object.name = content
     elif self.flag_last_modified:
         self.object.last_modified = parser ( ).parse ( content )
     elif self.flag_etag:
         self.object.etag = content
     elif self.flag_size:
         self.object.size = int ( content )
     elif self.flag_prefix:
         self.prefixes.append ( CommonPrefix ( content ) )
Beispiel #35
0
def parseXMLTimestamp(xsd_timestamp):

    xtp = parser.parser()

    dt = xtp.parse(xsd_timestamp)
    if dt.utcoffset() is None:
        raise error.PayloadError('Timestamp has no time zone information')

    # convert to utc and remove tz info (internal use)
    utc_dt = dt.astimezone(tzutc()).replace(tzinfo=None)
    return utc_dt
Beispiel #36
0
def parse_date_to_iso(date: str):
    """

    :param date: date string to parse
    :return: parsed date in ISO format
    """
    if not date:
        return ''

    try:
        parsed_date = parser.parse(parser(), date).isoformat()
    except ValueError:
        try:
            parsed_date = parser.parse(parser(), date,
                                       dayfirst=True).isoformat()
        except ValueError:
            parsed_date = ''
    except Exception:
        parsed_date = ''

    return parsed_date
def timetoken(token):
    """Helper method used by __timesplit."""
    try:
        float(token)
        return True
    except ValueError:
        pass

    info = dparser.parser().info
    return any(f(token) for f in (info.jump, info.weekday, info.month, \
                                  info.hms, info.ampm, info.pertain, \
                                  info.utczone, info.tzoffset))
Beispiel #38
0
def parseXMLTimestamp(xsd_timestamp):

    xtp = parser.parser()

    dt = xtp.parse(xsd_timestamp)
    if dt.utcoffset() is None:
        # this needs to changed to valueerror...
        from opennsa import error
        raise error.PayloadError('Timestamp has no time zone information')

    # convert to utc and remove tz info (internal use)
    utc_dt = dt.astimezone(tzutc()).replace(tzinfo=None)
    return utc_dt
 def get_date_sent(self, s):
     '''
     Returns the sent date using a regex pattern
     @param {String} s: input string to which the filter shall be applied 
     '''
     p_date = re.compile('Date: (.*)\\r')
     matches = re.findall(p_date, s)
     if len(matches) > 0:
         dateparser = parser()
         date = dateparser.parse(matches[0])
         return date
     else:
         return ""
Beispiel #40
0
def _convert_created_at(dstr):
    """
    This function converts the string returned from a tweet's 'created_at' field to a datetime object. It assumes that
    all dstr are in UTC timezone (which they are according to the API).

    :param dstr: a tweet's 'created_at' string, ex. 'Wed Apr 22 03:39:06 +0000 2015'
    :return dt: datetime object
    """
    if isinstance(dstr, datetime.datetime):
        return dstr
    else:
        p = parser()
        return p.parse(dstr)
Beispiel #41
0
def read_test_and_predict(model, test_file_path):
    test_data = pd.read_csv(test_file_path)
    test_data_list = []
    ids = test_data.loc[:, 'id']
    dates = test_data.loc[:, 'date']
    p = parser()
    for i in range(len(dates)):
        test_data_list.append(np.array(extract_date_and_time(dates[i], p)))
    predict_result = model.predict(np.array(test_data_list))
    final_ans = []
    for i in range(len(predict_result)):
        final_ans.append([int(ids[i]), predict_result[i]])
    return final_ans
Beispiel #42
0
def parseXMLTimestamp(xsd_timestamp):

    xtp = parser.parser()

    dt = xtp.parse(xsd_timestamp)
    if dt.utcoffset() is None:
        # this needs to changed to valueerror...
        from opennsa import error
        raise error.PayloadError('Timestamp has no time zone information')

    # convert to utc and remove tz info (internal use)
    utc_dt = dt.astimezone(tzutc()).replace(tzinfo=None)
    return utc_dt
Beispiel #43
0
def parse_info(when, now):
    when_info = parser.parser()._parse(when)
    info = {
        "day"   : hasattr(when_info, 'day')      and when_info.day!=None,
        "month" : hasattr(when_info, 'month')    and when_info.month!=None,
        "year"  : hasattr(when_info, 'year')     and when_info.year!=None,
        "hour"  : hasattr(when_info, 'hour')     and when_info.hour!=None,
        "minute"  : hasattr(when_info, 'minute') and when_info.minute!=None,
        "second"  : hasattr(when_info, 'second') and when_info.second!=None,
        "microsecond" : hasattr(when_info, 'microsecond') and when_info.microsecond!=None,
    }
    info["no_date"] = not info["day"] and not info["month"] and not info["year"]
    return info
    def _result_from_job_response(self, job_response):
        # type: (AcQuantumResultResponse) -> Result

        backend = self.backend()  # type: BaseBackend
        config = backend.configuration()  # type: BackendConfiguration
        experiment = self._api.get_experiment(int(self.job_id()))  # type: AcQuantumExperiment

        result_details = {}
        job_results = job_response.get_results()
        if len(job_results) == 1:
            experiment_result = job_results[0]  # type: AcQuantumResult

            counts = dict((hex(int(k, 2)), int(v * experiment_result.shots)) for k, v in experiment_result.data.items())
            self._qobj = Qobj.from_dict(json.loads(experiment.code))
            self._job_name = self._qobj.experiments[0].header.name

            success = experiment_result.exception is None

            result_details = {
                "status": self._status.name,
                "success": success,
                "name": self._job_name,
                "seed": experiment_result.seed,
                "shots": experiment_result.shots,
                "data": {
                    "counts": counts
                },
                "start_time": experiment_result.start_time,
                "finish_time": experiment_result.finish_time,
                "header": self._qobj.experiments[0].header.as_dict()
            }

        from dateutil.parser import parser
        date = parser().parse(result_details['finish_time'])

        result_dict = {
            'results': [result_details],
            'backend_name': config.backend_name,
            'backend_version': config.backend_version,
            'qobj_id': self._qobj.qobj_id,
            'job_id': str(self.job_id()),
            'success': len(job_results) == 1,
            'header': {
                "backend_name": config.backend_name
            },
            "date": date.isoformat()
        }

        result = Result.from_dict(result_dict)

        return result
Beispiel #45
0
 def validate(self):
     """
     :exception: Exception
     :return:
     """
     try:
         if self.iso_format:
             isoparse(self.str_date)
         else:
             date_parser = parser()
             date_parser.parse(self.str_date, None)
     except Exception as err:
         raise Exception('Invalid date format', err)
     return True
Beispiel #46
0
def user(row):
    assert len(row) > 4
    banned_since = parse_date_to_iso(row[2]) if row[2] else ''
    banned_until = parser.parse(parser(), banned_since) + timedelta(
        days=float(row[1])) if banned_since else ''
    user = {
        'account': row[0] or '',
        'is_banned': row[3].lower() == 'yes',
        'banned_since': banned_since,
        'banned_until': banned_until.isoformat() if banned_until else '',
        'banned_by': row[5],
        'reason': row[4]
    }
    return user
def synchronise_collection_timestamps(test_with_baton: TestWithBaton, collection: Collection):
    """
    Synchronises the timestamps of the given data object to align with the timestamps recorded on iRODS.
    :param test_with_baton: framework to allow testing with baton
    :param data_object: data object to synchronise timestamps for
    """
    baton_runner = BatonRunner(test_with_baton.baton_location)
    query_input = CollectionJSONEncoder().default(collection)
    query_return = baton_runner.run_baton_query(BatonBinary.BATON_LIST, ["--timestamp"], query_input)
    date_parser = parser()
    for timestamp_as_json in query_return[0]["timestamps"]:
        if "created" in timestamp_as_json:
            collection.created = date_parser.parse(timestamp_as_json["created"])
        else:
            collection.last_modified = date_parser.parse(timestamp_as_json["modified"])
def isSuspicious(api, spam_id):
    SUSPICIOUS_DAY = 7
    p = parser()
    user = api.GetUser(user_id=spam_id)
    status = user.Status
    creation = p.parse(status.created_at)
    current = time.localtime()
    dif = (current[0] - creation.year) * 365 + (current[1] - creation.month) * 30 + (current[2] - creation.day)
    if dif > SUSPICIOUS_DAY:
        return False
    else:
        try:
            item = api.GetUser(user_id=spam_id)
            return true
        except twitter.TwitterError as err:
            return False
        return True
def synchronise_data_object_timestamps(test_with_baton: TestWithBaton, data_object: DataObject):
    """
    Synchronises the timestamps of the given data object to align with the timestamps recorded on iRODS.
    :param test_with_baton: framework to allow testing with baton
    :param data_object: data object to synchronise timestamps for
    """
    baton_runner = BatonRunner(test_with_baton.baton_location)
    query_input = DataObjectJSONEncoder().default(data_object)
    query_return = baton_runner.run_baton_query(BatonBinary.BATON_LIST, ["--timestamp"], query_input)
    date_parser = parser()
    for timestamp_as_json in query_return[0]["timestamps"]:
        replica_number = timestamp_as_json["replicates"]
        replica = data_object.replicas.get_by_number(replica_number)
        if "created" in timestamp_as_json:
            replica.created = date_parser.parse(timestamp_as_json["created"])
        else:
            replica.last_modified = date_parser.parse(timestamp_as_json["modified"])
def test():
    import json
    from datetime import datetime as dt
    from dateutil.parser import parser

    p = parser()
    # before timestamper
    for path in (BEFORE_OUTPUT_FILE, AFTER_OUTPUT_FILE):
        with open(path) as f:
            prev_ts = dt(year=1900, month=1, day=1)
            for i, line in enumerate(f):
                record = json.loads(line)
                ts_str = record['t']
                ts = p.parse(ts_str)
                # print('ts_str=%s ; ts=%s', (ts_str, ts))
                ts > prev_ts
            assert(i + 1 == NUM_RECORDS)
Beispiel #51
0
    def testCustomParserShortDaynames(self):
        # Horacio Hoyos discovered that day names shorter than 3 characters,
        # for example two letter German day name abbreviations, don't work:
        # https://github.com/dateutil/dateutil/issues/343
        from dateutil.parser import parserinfo, parser

        class GermanParserInfo(parserinfo):
            WEEKDAYS = [("Mo", "Montag"),
                        ("Di", "Dienstag"),
                        ("Mi", "Mittwoch"),
                        ("Do", "Donnerstag"),
                        ("Fr", "Freitag"),
                        ("Sa", "Samstag"),
                        ("So", "Sonntag")]

        myparser = parser(GermanParserInfo())
        dt = myparser.parse("Sa 21. Jan 2017")
        self.assertEqual(dt, datetime(2017, 1, 21))
def ConvertUTCDateTimeStringToLocalTimeString(utc_string):
  """Returns a string representation of the given UTC string in local time.

  Args:
    utc_string: The string representation of the UTC datetime.

  Returns:
    A string representing the input time in local time. The format will follow
    '%Y-%m-%d %H:%M:%S %Z'.
  """
  dt_parser = parser.parser()
  try:
    utc_dt = dt_parser.parse(utc_string)
  except ValueError:
    log.warn('Failed to parse UTC string %s', utc_string)
    return utc_string
  except OverflowError:
    log.warn('Parsed UTC date exceeds largest valid C integer on this system')
    return utc_string
  loc_dt = utc_dt.astimezone(tz.tzlocal())
  fmt = '%Y-%m-%d %H:%M:%S %Z'
  return loc_dt.strftime(fmt)
Beispiel #53
0
 def __setattr__(self, key, value):
     field_def = next((x for x in self.FIELD_DEFS if x.title == key), False)
     if field_def:
         if type(value) in [str, unicode, int, long] and datetime.datetime in field_def.allowed_types:
             if type(value) in [int, long]:
                 value = datetime.datetime.fromtimestamp(value)
             elif type(value) in [str, unicode]:
                 value = date_parser.parser(value)
             else:
                 raise ValueError
         if self.__validate(value, field_def.allowed_types):
             if 'choices' in field_def:
                 if self.__checkchoices(value, field_def.choices):
                     self.__add_field(field_def, value)
                 else:
                     e = "{0} is not present in choices {1}".format(value, field_def.choices)
                     raise ValueError(e)
             else:
                 self.__add_field(field_def, value)
         else:
             e = "{0} does not validate against allowed_types {1}"\
                 .format(value, field_def.allowed_types)
             raise ValueError(e)
Beispiel #54
0
def convertToMilitaryTime(class_time):
    listOtimes = []
    militaryTime = None
    returnArray = []
#   print ">>>>>>>>>>>",class_time
    if class_time == "TBA" or class_time == "":
        return {"start": "N/A", "end": "N/A"}
    unInterpretedList = class_time.split('-')

    #check am to pm
    try:
        first2Digits = int(unInterpretedList[0].split(':')[0])
        if first2Digits < 12 and first2Digits >= 9:
            unInterpretedList[0] = unInterpretedList[0] + "am"
        elif first2Digits >= 1 and first2Digits <= 6:
            unInterpretedList[0] = unInterpretedList[0] + "pm"
        else:
            amOrPm = class_time[-2:]
            unInterpretedList[0] = unInterpretedList[0] + amOrPm
    except:
        pass
    #   print convertToMilitaryTime(class["Time"])
    #converts to date
    for i in unInterpretedList:
        if i[-2:] == "am" or i[:2] == "12":
            newTime = i[:-2]
            preTime = newTime.replace(":", "")
            militaryTime = int(preTime)
        elif i[-2:] == "pm":
            newTime = i[:-2]
            preTime = newTime.replace(":", "")
            militaryTime = int(preTime)
            militaryTime += 1200
        returnArray.append(militaryTime)
    for i in unInterpretedList:
        listOtimes.append(dparser.parser(i))
    return {"start": returnArray[0] ,"end": returnArray[1] }
Beispiel #55
0
    def testParserParseStr(self):
        from dateutil.parser import parser

        self.assertEqual(parser().parse(self.str_str),
                         parser().parse(self.uni_str))
Beispiel #56
0
    try:
        if not arg:
            return arg
        return _dtparser.parse(arg, dayfirst=dayfirst)
    except Exception:
        if errors == 'raise':
            raise
        return arg


class DateParseError(ValueError):
    pass


_dtparser = parser()


# patterns for quarters like '4Q2005', '05Q1'
qpat1full = re.compile(r'(\d)Q(\d\d\d\d)')
qpat2full = re.compile(r'(\d\d\d\d)Q(\d)')
qpat1 = re.compile(r'(\d)Q(\d\d)')
qpat2 = re.compile(r'(\d\d)Q(\d)')


def parse_time_string(arg, freq=None):
    """
    Try hard to parse datetime string, leveraging dateutil plus some extra
    goodies like quarter recognition.

    Parameters
Beispiel #57
0
def get_schedule_tally(username, total_interval, override_date=None):
    """
    Main entry point
    For a given username and interval, get a simple array of the username and scheduled visit (whether a submission is there or not)  exists.
    returns (schedule_tally_array, patient_array, total_scheduled (int), total_visited(int))
    schedul_tally_array = [visit_date, [(patient1, visit1), (patient2, visit2), (patient3, None), (patient4, visit4), ...]]
    where visit = XFormInstance
    """
    if override_date is None:
        nowdate = datetime.utcnow()
        chw_schedule = CHWPatientSchedule.get_schedule(username)
    else:
        nowdate = override_date
        chw_schedule = CHWPatientSchedule.get_schedule(username, override_date=nowdate)

    patient_case_ids = set([x['case_id'] for x in chw_schedule.raw_schedule])
    patient_cache = get_patient_display_cache(list(patient_case_ids))

    #got the chw schedule
    #now let's walk through the date range, and get the scheduled CHWs per this date.visit_dates = []
    ret = [] #where it's going to be an array of tuples:
    #(date, scheduled[], submissions[] - that line up with the scheduled)

    total_scheduled = 0
    total_visited = 0

    for n in range(0, total_interval):
        td = timedelta(days=n)
        visit_date = nowdate - td
        scheduled_case_ids = chw_schedule.scheduled_for_date(visit_date)
        patient_case_ids = set([x for x in scheduled_case_ids if x is not None])
        dereferenced_patient_info = [patient_cache.get(x, {}) for x in patient_case_ids]
        visited = []

        #inefficient, but we need to get the patients in alpha order
        #patients = sorted(patients, key=lambda x: x.last_name)

        dp = parser()
        for case_id in patient_case_ids:
            total_scheduled += 1
            search_results = dots_submissions_by_case(case_id, visit_date, username=username)
            submissions = search_results['hits']['hits']
            if len(submissions) > 0:
                #calculate if pillbox checked
                pillbox_check_str = submissions[0]['fields']['pillbox_check']
                if len(pillbox_check_str) > 0:
                    pillbox_check_data = json.loads(pillbox_check_str)
                    anchor_date = dp.parse(pillbox_check_data.get('anchor'))
                else:
                    pillbox_check_data = {}
                    anchor_date = datetime.min
                encounter_date = dp.parse(submissions[0]['fields']['encounter_date'])
                submissions[0]['fields']['has_pillbox_check'] = 'Yes' if anchor_date.date() == encounter_date.date() else 'No'

                visited.append(submissions[0]['fields'])
                total_visited += 1
            else:
                #ok, so no submission from this chw, let's see if there's ANY from anyone on this day.
                search_results = dots_submissions_by_case(case_id, visit_date)
                other_submissions = search_results['hits']['hits']
                if len(other_submissions) > 0:
                    visited.append(other_submissions[0]['fields'])
                    total_visited += 1
                else:
                    visited.append(None)
        ret.append((visit_date, list(zip(dereferenced_patient_info, visited))))
    return ret, patient_case_ids, total_scheduled, total_visited
Beispiel #58
0
def parse_date(string):
    query_date=[]
    response=[]
    label=""
    string=string.replace("go to","")
    substr = re.split('( on | in | between | from | and | to | since )',string)
    for substring in substr:
        substring = "".join(" " if c in ('!','.',':',',','-') else c for c in substring)    #ignore punctuation
        
        subdate=list(parser.parser().parse(substring, None, fuzzy_with_tokens=True))    #parse date

        today=date.today()
        if "today" in substring:
            subdate[0]=today
        if "yesterday" in substring:
            subdate[0]=today-timedelta(1)
        if "this" in substring:
            if "year" in substring:
                subdate[0].year=today.year
            if "month" in substring:
                subdate[0].year=today.year
                subdate[0].month=today.month
            elif subdate[0].year is None and subdate[0].month is not None:
                subdate[0].year=today.year
        if "last" in substring:
            if "year" in substring:
                subdate[0].year=today.year-1
            if "month" in substring:
                subdate[0].year=today.year
                subdate[0].month=today.month-1
                if subdate[0]==0:
                    subdate[0].year=today.year-1
                    subdate[0].month=12
            elif subdate[0].year is None and subdate[0].month is not None:
                subdate[0].year=today.year-1
        if subdate[0].year is None and (subdate[0].month is not None or subdate[0].day is not None):
            subdate[0].year=today.year

        if substring==' from ' or substring==" between " or substring==" since ":
            query_date.append("txn.posted_date>='")
            label="between"
        elif substring==' to ' or (substring==" and " and label=="between") or substring==" until " or substring==" till ":
            query_date.append("txn.posted_date<='")
            label="and"

        if subdate[0].year is not None or subdate[0].month is not None or subdate[0].day is not None:
            if label=="between":
                if subdate[0].month is None:
                    subdate[0].month=1
                    subdate[0].day=1
                elif subdate[0].day is None:
                    subdate[0].day=1
                norm_date = "-".join([str(subdate[0].year),str(subdate[0].month),str(subdate[0].day)])
                query_date.append(norm_date)
                query_date.append("'")
            elif label=="and":
                if subdate[0].month is None:
                    subdate[0].month=12
                    subdate[0].day=calendar.monthrange(subdate[0].year,subdate[0].month)[1]
                elif subdate[0].day is None:
                    subdate[0].day=calendar.monthrange(subdate[0].year,subdate[0].month)[1]
                norm_date = "-".join([str(subdate[0].year),str(subdate[0].month),str(subdate[0].day)])
                query_date.append(norm_date)
                query_date.append("'")
            else:
                if subdate[0].month is None:
                    norm_date_1 = "-".join([str(subdate[0].year),"01","01"])
                    norm_date_2 = "-".join([str(subdate[0].year),"12","30"])
                    norm_date = "' and '".join([norm_date_1,norm_date_2])
                    query_date.append("txn.posted_date between '")
                    query_date.append(norm_date)
                    query_date.append("'")
                elif subdate[0].day is None:
                    norm_date_1 = "-".join([str(subdate[0].year),str(subdate[0].month),"01"])
                    norm_date_2 = "-".join([str(subdate[0].year),str(subdate[0].month),str(calendar.monthrange(subdate[0].year,subdate[0].month)[1])])
                    norm_date = "' and '".join([norm_date_1,norm_date_2])
                    query_date.append("txn.posted_date between '")
                    query_date.append(norm_date)
                    query_date.append("'")
                else:
                    norm_date = "-".join([str(subdate[0].year),str(subdate[0].month),str(subdate[0].day)])
                    query_date.append("date(txn.posted_date)='")
                    query_date.append(norm_date)
                    query_date.append("'")

    response="".join(query_date) 
    response=response.replace("'txn.posted_date","' and txn.posted_date")
    return response
Beispiel #59
0
def extract_from_excel(file_path):
    if not default_storage.exists(file_path):
        raise NameError("%s is not a valid file." % file_path)

    file_ext = (file_path[-4:]).lower()
    if file_ext != ".csv" and file_ext != ".xls":
        raise NameError("%s is not a valid file type (should be either .csv or .xls)." % file_path)

    fields = []
    data_list = []

    if file_ext == ".csv":
        import csv
        import dateutil.parser as dparser

        normalize_newline(file_path)
        data = csv.reader(default_storage.open(file_path, "rU"))

        # read the column header
        fields = data.next()
        fields = [smart_str(field) for field in fields]

        r = 1
        for row in data:
            item = dict(zip(fields, row))
            for key in item.keys():
                if key in field_type_dict and field_type_dict[key] == "DateTimeField":
                    item[key] = dparser.parser(item[key])
            item["ROW_NUM"] = r + 1
            data_list.append(item)
            r += 1
    else:
        book = xlrd.open_workbook(file_path)
        nsheets = book.nsheets
        nrows = book.sheet_by_index(0).nrows

        # get the fields from the first row
        for i in range(0, nsheets):
            sh = book.sheet_by_index(i)
            for c in range(0, sh.ncols):
                col_item = sh.cell_value(rowx=0, colx=c)
                fields.append(smart_str(col_item))

        # get the data - skip the first row
        for r in range(1, nrows):
            row = []
            for i in range(0, nsheets):
                sh = book.sheet_by_index(i)
                for c in range(0, sh.ncols):
                    cell = sh.cell(r, c)
                    cell_value = cell.value
                    if cell.ctype == xlrd.XL_CELL_DATE:
                        date_tuple = xlrd.xldate_as_tuple(cell_value, book.datemode)
                        cell_value = datetime.date(date_tuple[0], date_tuple[1], date_tuple[2])
                    elif cell.ctype in (2, 3) and int(cell_value) == cell_value:
                        # so for zipcode 77079,
                        # we don't end up with 77079.0
                        cell_value = int(cell_value)
                    row.append(cell_value)

            item = dict(zip(fields, row))
            item["ROW_NUM"] = r + 1
            data_list.append(item)

    return data_list