def __init__(self, file_handle): """Create parser for open file. -- file_handle a file object to read from. The file must be csv and have a 'time' column in ISO date format. """ # auto detect the dialect from an initial sample #dialect = csv.Sniffer().sniff(file_handle.read(1000)) #file_handle.seek(0) csv.DictReader.__init__(self, file_handle)#, dialect=dialect) #super(TimeParser, self).__init__(file_handle)#, dialect=dialect) self.current_row = dict((k.lower().strip(), v) for k, v in self.next().iteritems()) self.next_row = dict((k.lower().strip(), v) for k, v in self.next().iteritems()) self.current_time = dateparse(self.current_row['time']) self.next_time = dateparse(self.next_row['time']) # we need the list of cameras that have image columns here self.cameras = [] # we need to filter down based on cameras... self.camera_sensors = {} # but pose is generic... so no need to list things self.pose_sensors = []
def main(args): ''' Generates statistics on all repositories ''' start = dateparse(args.start_date) end = dateparse(args.end_date) log.info("Initializing %s", args.filename) with open(args.filename, 'w') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',', quotechar="'", quoting=csv.QUOTE_MINIMAL) csvwriter.writerow([ 'repo', 'issues created', 'issues closed', 'pull requests opened', 'pull requests closed', 'comments created', 'commits', 'releases']) for repo in REPOS: log.info("Getting report statistics on %s", repo) stats = ( repo, count_issues_created(repo, start, end), count_issues_closed(repo, start, end), count_pull_requests_opened(repo, start, end), count_pull_requests_closed(repo, start, end), count_comments_created(repo, start, end), count_commits(repo, start, end), count_releases(repo, start, end) ) csvwriter.writerow([str(i) for i in stats])
def skede_from_skededict(data_dict, filing_number, header_row_id, is_amended, cd): data_dict['transaction_id'] = data_dict['transaction_id'][:20] data_dict['header_id'] = header_row_id data_dict['superceded_by_amendment'] = is_amended data_dict['filing_number'] = filing_number ## The switch from v.8 to v.8.1 added a 'dissemination date' though it kept the expenditure date. ## We now prefer the dissemination date, but fall back to the expenditure date if it's not available. ## The spec says that not having either is an error, so... datefound = False try: data_dict['expenditure_date_formatted'] = dateparse(data_dict['dissemination_date']) datefound = True except ValueError: pass except KeyError: pass if not datefound: try: data_dict['expenditure_date_formatted'] = dateparse(data_dict['expenditure_date']) datefound = True except ValueError: pass except KeyError: pass data_dict['expenditure_amount'] = validate_decimal(data_dict['expenditure_amount']) data_dict['calendar_y_t_d_per_election_office'] = validate_decimal(data_dict['calendar_y_t_d_per_election_office']) #model_instance = SkedE() cd.writerow('E', data_dict)
def parse(root, UNITS): value = root.find("./pod[@id='Result']").find('subpod').find('plaintext').text print value if value.startswith('~~ '): value = value.strip('~~ ') m = __number_re.search(value) if m: QUANTITY = float(m.group(1)) UNIT = m.group(2).lower() if "trillion" in UNIT: QUANTITY *= pow(10, 12) elif "billion" in UNIT: QUANTITY *= pow(10, 9) elif "million" in UNIT: QUANTITY *= pow(10, 6) elif "thousand" in UNIT: QUANTITY *= pow(10, 3) elif "date" in UNITS: try: print "F**K YOU 2" dt = dateparse(str(int(QUANTITY))) QUANTITY = (dt - datetime.datetime(1970, 1, 1)).total_seconds() except Exception as e: raise NameError("Exception") if not UNITS: if "$" in value: UNIT = "dollars" else: UNIT = UNITS else: # check if it is a date try: print value if len(value) == 4: epoch = datetime(1970, 1, 1) t = datetime(int(value), 1, 1) diff = t-epoch QUANTITY = diff.total_seconds() print QUANTITY else: print "Not 4 chars" print value dt = dateparse(value) QUANTITY = (dt - datetime.datetime(1970, 1, 1)).total_seconds() UNIT = "date" except: raise NameError('Could not parse!') print QUANTITY return (QUANTITY, UNIT)
def moin2atomentries(wikibase, outputdir, rewrite, pattern): wikibase_len = len(rewrite) if pattern: pattern = re.compile(pattern) #print (wikibase, outputdir, rewrite) req = urllib2.Request(wikibase, headers={'Accept': RDF_IMT}) with closing(urllib2.urlopen(req)) as resp: feed = bindery.parse(resp) for item in feed.RDF.channel.items.Seq.li: uri = split_fragment(item.resource)[0] relative = uri[wikibase_len:] print >> sys.stderr, uri, relative if pattern and not pattern.match(relative): continue if rewrite: uri = uri.replace(rewrite, wikibase) req = urllib2.Request(uri, headers={'Accept': DOCBOOK_IMT}) with closing(urllib2.urlopen(req)) as resp: page = bindery.parse(resp) entrydate = dateparse(unicode(page.article.articleinfo.revhistory.revision.date)) if entrydate.tzinfo == None: entrydate = entrydate.replace(tzinfo=DEFAULT_TZ) output = os.path.join(outputdir, OUTPUTPATTERN%pathsegment(relative)) if os.access(output, os.R_OK): lastrev = dateparse(unicode(bindery.parse(output).entry.updated)) if lastrev.tzinfo == None: lastrev = lastrev.replace(tzinfo=DEFAULT_TZ) if (entrydate == lastrev): print >> sys.stderr, 'Not updated. Skipped...' continue print >> sys.stderr, 'Writing to ', output with open(output, 'w') as output: handle_page(uri, page, outputdir, relative, output) return
def read(self, request, *args, **kwargs): allowed_entities = { 'legislator': { 'model': NgramsByBioguide, 'field': 'bioguide_id', }, 'state': { 'model': NgramsByState, 'field': 'state', }, 'date': { 'model': NgramsByDate, 'field': 'date', }, 'month': { 'model': NgramsByMonth, 'field': 'month', }, } n = request.GET.get('n', 1) try: n = int(n) except ValueError: return {'error': 'Invalid phrase length.', 'results': []} if n > 5: return {'error': 'Invalid phrase length.', 'results': []} entity = request.GET.get('entity_type', '') if entity not in allowed_entities.keys(): return {'error': 'Invalid entity.', 'results': []} entity = allowed_entities[entity] val = request.GET.get('entity_value', '') if not val: return {'error': 'Invalid entity value.', 'results': []} sort = request.GET.get('sort', 'tfidf desc') if sort not in self.SORT_FIELDS.keys(): return {'error': 'Invalid sort field.', 'results': []} per_page, offset = self.get_pagination(request) model = entity['model'] field = entity['field'] if field == 'date' and val: try: dateparse(val) except ValueError: return {'error': 'Invalid date.', 'results': []} query = {'n': n} if val: query.update({field: val}) qset = model.objects.filter(**query) if sort: qset = qset.order_by(self.SORT_FIELDS[sort]) return qset[offset:offset+per_page]
def fromJSON(self, j): self.href = j['d:href'] prop = j['d:propstat']['d:prop'] self.name = prop['d:displayname'] self.length = prop['d:getcontentlength'] self.modified = dateparse(prop['d:getlastmodified']) self.created = dateparse(prop['d:creationdate']) return self
def test_agent_last_update_increases(volttron_instance): agent = volttron_instance.build_agent() s = json.loads(agent.vip.health.get_status()) dt = dateparse(s['last_updated'], fuzzy=True) agent.vip.health.set_status(STATUS_UNKNOWN, 'Unknown now!') gevent.sleep(1) s = json.loads(agent.vip.health.get_status()) dt2 = dateparse(s['last_updated'], fuzzy=True) assert dt < dt2
def __read_tasks(self, tlist, parent=None): '''Internal function to recursively add tasks from an XML file to the treestore self.tasklist.''' for task in tlist.iterfind('./task'): #make a list from subelements and attributes to add to the treestore tasks = [] tasks.append(int(task.get('priority'))) tasks.append(int(task.find('pct').text)) tasks.append(int(task.find('est').text)) tasks.append(int(task.find('spent').text)) est_begin_raw = task.find('est-begin').text if est_begin_raw is None: tasks.append("") else: tasks.append(dateparse(est_begin_raw)) est_complete_raw = task.find('est-complete').text if est_complete_raw is None: tasks.append("") else: tasks.append(dateparse(est_complete_raw)) act_begin_raw = task.find('act-begin').text if act_begin_raw is None: tasks.append("") else: tasks.append(dateparse(act_begin_raw)) completed_raw = task.find('completed').text if completed_raw is None: tasks.append("") else: tasks.append(dateparse(completed_raw)) due_raw = task.find('due').text if due_raw is None: tasks.append("") else: tasks.append(dateparse(due_raw)) assigner_raw = task.find('assigner').text if assigner_raw is None: assigner_raw = '' tasks.append(assigner_raw) assignee_raw = task.find('assignee').text if assignee_raw is None: assignee_raw = '' tasks.append(assignee_raw) status_raw = task.find('status').text if status_raw is None: status_raw = '' tasks.append(status_raw) done = task.get('done') == "True" tasks.append(done) tasks.append(task.find('title').text) notes_raw = task.find('notes').text if notes_raw is None: notes_raw = '' tasks.append(notes_raw) tasks.append(task.find('due').get('useTime') == "True") tasks.append(not done) #inverse done tasks.append(False) #time track flag #append to store treeiter = self.tasklist.append(parent, tasks) self.__read_tasks(task.find('tasklist'), treeiter)
def phrase_by_category(phrase, entity_type, start_date=None, end_date=None, mincount=1, sort='false'): '''finds occurences of a specific phrase by entity_type. expects dates in dd/mm/yyyy format. if 'start' and 'end' date are none, defaults to all time. the mincount argument controls whether counts are returned for all entities in the category, or only those with non-zero results.''' args = {} if isinstance(start_date, basestring): start_date = dateparse(start_date).strftime('%d/%m/%Y') if isinstance(end_date, basestring): end_date = dateparse(end_date).strftime('%d/%m/%Y') # set up the faceting. many of these query args need to be set using a # string variable for the key since they contain periods. args['facet'] = "true" if entity_type == 'legislator': field = 'speaker_bioguide' elif entity_type == 'state': field = 'speaker_state' elif entity_type == 'party': field = 'speaker_party' elif entity_type == 'bioguide': field = 'speaker_bioguide' else: raise NotImplementedError(entity_type) args['facet.field'] = field if mincount: args['facet.mincount'] = 1 args['facet.sort'] = sort # default limit for # faceted fields returned is 100; we want to return for # all fields. facet_limit = 'facet.limit' args[facet_limit] = -1 q = '''text:"%s"''' % phrase if start_date and end_date: start = as_solr_date(start_date) end = as_solr_date(end_date) daterange = '''date:[%s TO %s]''' % (start, end) q = '''(%s AND %s)''' % (q, daterange) args['q'] = q # return counts only, not the documents themselves args['rows'] = 0 # do the api call json_resp = solr_api_call(args) # remove any cruft and format nicely. return json_resp
def generic_query(*args, **kwargs): q = [] args = {} if 'date' in kwargs: date = dateparse(kwargs['date']) start = date.strftime('%d/%m/%Y') end = (date + datetime.timedelta(1)).strftime('%d/%m/%Y') q.append("date:[%s TO %s]" % (as_solr_date(start), as_solr_date(end))) elif 'start_date' in kwargs and 'end_date' in kwargs: start = dateparse(kwargs['start_date']).strftime('%d/%m/%Y') end = dateparse(kwargs['end_date']).strftime('%d/%m/%Y') q.append("date:[%s TO %s]" % (as_solr_date(start), as_solr_date(end))) if 'phrase' in kwargs: q.append('text:%s' % kwargs['phrase']) if 'congress' in kwargs: volumes = volume_lookup(kwargs['congress'], kwargs.get('session')) if not volumes: volumes = ['0', ] q.append('volume:(%s)' % ' OR '.join(volumes)) if 'chamber' in kwargs: valid_chambers = ['house', 'senate', 'extensions', ] selected_chambers = [] for chamber in kwargs['chamber'].lower().split('|'): if chamber in valid_chambers: selected_chambers.append(chamber) if selected_chambers: q.append('chamber:(%s)' % ' OR '.join([x.title() for x in selected_chambers])) entities = {'state': 'speaker_state', 'party': 'speaker_party', 'legislator': 'speaker', 'bioguide': 'speaker_bioguide', 'cr_pages': 'pages', 'volume': 'volume', } for k, v in entities.iteritems(): if k in kwargs: q.append('%s:%s' % (v, kwargs[k])) if len(q): args['q'] = '(%s)' % ' AND '.join(q) else: args['q'] = '*:*' return args
def get_noao_query_kwargs(**kwargs): """ Get the NOAO download query. """ # Some columns are required required = [ 'reference', 'release_date', 'start_date', 'filesize', 'dtpropid', 'md5sum' ] defaults = dict(tstart=dateparse('2012-11-01'), tstop=date.today(), exptime=30,filters=('u','g','r','i','z','Y'), limit=250000,expnum='%') defaults['columns'] = [ 'reference', 'release_date', 'start_date', 'date_obs', 'instrument', 'ra', 'dec', 'filter', 'exposure', 'obstype', 'proctype', 'dtacqnam AS original_file', 'reference AS archive_file', 'filesize', ] for k,v in defaults.items(): kwargs.setdefault(k,v) kwargs['columns'] = map(str.lower,kwargs['columns']) kwargs['columns'] += [c for c in required if c not in kwargs['columns']] kwargs['tstart'] = dateparse(str(kwargs['tstart'])) kwargs['tstop'] = dateparse(str(kwargs['tstop'])) if not isinstance(kwargs['columns'],basestring): kwargs['columns'] = ','.join(kwargs['columns']) if not isinstance(kwargs['filters'],basestring): kwargs['filters'] = ','.join(["'%s'"%f for f in kwargs['filters']]) if isinstance(kwargs['expnum'],int): kwargs['expnum'] = '{expnum:08d}'.format(**kwargs) return kwargs
def handle(self, *args, **options): if not args: raise CommandError('Invalid arguments, must provide: %s' % self.args) filename = args[0] print "Processing file '%s' " % (filename) infile = open(filename, 'r') create_new_ads = options['create'] if create_new_ads: print "Will create new ads when applicable" extra_rows = int(options['extra_rows']) if extra_rows: print "Disregarding first %s rows from csv file" % (extra_rows) # Skip the first n lines before looking for headers, if requested. for i in range(0,extra_rows): next(infile) reader = None excel = options['excel'] if excel: print "Trying to parse csv using excel dialect" reader = csv.DictReader(infile, dialect='excel') else: reader = csv.DictReader(infile) for row in reader: this_row_data = {} for key in default_options.keys(): try: this_row_data[key] = row[default_options[key]] except KeyError: this_row_data[key] = None # get date objects for the dates entered. Assumes the dates don't need additional transformation. if this_row_data['contract_start_date']: this_row_data['contract_start_date'] = dateparse(this_row_data['contract_start_date']) if this_row_data['contract_end_date']: this_row_data['contract_end_date'] = dateparse(this_row_data['contract_end_date']) if this_row_data['upload_time']: this_row_data['upload_time'] = dateparse(this_row_data['upload_time']).date() #print this_row_data handle_row_data(this_row_data, create_new_ads)
def ps(self): all_containers = [] # Collect the system containers for i in self.syscontainers.get_system_containers(): container = i["Id"] inspect_stdout = util.check_output(["runc", "state", container]) ret = json.loads(inspect_stdout) status = ret["status"] if not self.args.all and status != "running": continue image = i['Image'] command = "" created = dateparse(ret['created']).strftime("%F %H:%M") # pylint: disable=no-member all_containers.append({"type" : "systemcontainer", "container" : container, "image" : image, "command" : command, "created" : created, "status" : status, "runtime" : "runc"}) # Collect the docker containers for container in [x["Id"] for x in self.d.containers(all=self.args.all)]: ret = self._inspect_container(name=container) status = ret["State"]["Status"] image = ret['Config']['Image'] command = u' '.join(ret['Config']['Cmd']) if ret['Config']['Cmd'] else "" created = dateparse(ret['Created']).strftime("%F %H:%M") # pylint: disable=no-member all_containers.append({"type" : "docker", "container" : container, "image" : image, "command" : command, "created" : created, "status" : status, "runtime" : "Docker"}) if self.args.json: self.write_out(json.dumps(all_containers)) return col_out = "{0:12} {1:20} {2:20} {3:16} {4:9} {5:10}" if self.args.heading: self.write_out(col_out.format("CONTAINER ID", "IMAGE", "COMMAND", "CREATED", "STATUS", "RUNTIME")) for container in all_containers: self.write_out(col_out.format(container["container"][0:12], container["image"][0:20], container["command"][0:20], container["created"][0:16], container["status"][0:9], container["runtime"][0:10]))
def scrape_csv_file(): url = 'http://www.fdic.gov/regulations/meetings/vlog.csv' reader = csv.DictReader(urllib2.urlopen(url)) return [{ 'staff': parse_staff(row['Person Visited']), 'meeting_time': dateparse(row['Date']), 'disclosure_time': dateparse(row['Creation Date']), 'organizations': parse_organizations(row['Affiliation']), 'visitors': parse_visitors(row['Visitor']), 'material_provided': row['Material Provided'], 'description': row['Issues Discussed'].replace(':', '; ') } for row in reader if row['Status'].strip() == 'PUBLISH']
def main(args): ''' Generates a CSV for the issues for a specific repo ''' log.info('Initializing %s', args.filename) start = dateparse(args.start_date) end = dateparse(args.end_date) log.info('Getting issues from github %s', args.repository) response = issues(args.repository, start, end) rows = [(str(r['number']), r['title'], r['state'], str(r['closed_at'])) for r in response] with open(args.filename, 'wb') as csvfile: writer = UnicodeWriter(csvfile) writer.writerow(('Issue Number', 'Title', 'Status', 'Closed At')) writer.writerows(rows) log.info('Done')
def parseyears(relationship): try: startyear = int(relationship['commenced dating']) endyear = int(relationship['separated']) except ValueError: startyear = dateparse(relationship['commenced dating']).year endyear = dateparse(relationship['separated']).year except KeyError: startyear = None endyear = None finally: try: return startyear, endyear except UnboundLocalError: return startyear, None
def fromXML(self, j): [href, propstat] = j.getchildren() self.href = href.text [status, prop] = propstat.getchildren() for x in prop: tag = x.tag.replace(D, '') if tag == 'displayname': self.name = x.text elif tag == 'getcontentlength': self.length = int(x.text) elif tag == 'getlastmodified': self.modified = dateparse(x.text) elif tag == 'creationdate': self.created = dateparse(x.text) return self
def rstrip_date(s): date = re.search(r'.+?(\([^(]+)$', s) if date is not None: possible_date = date.group(1).split(')')[0].strip(', \n(') if possible_date.isdigit(): if re.search(r'^[12]\d\d\d$', possible_date) is not None: return s[:date.start(1)].strip() else: return s try: dateparse(possible_date) s = s[:date.start(1)].strip() except (ValueError, TypeError): pass return s
def _format_bill(bill): bill = bill.copy() btype = bill_type_for(bill['bill_id']) bnumber = bill.get('number') or bill_number_for(bill['bill_id']) bdate = bill.get('legislative_day') or bill.get('last_action_at') try: bdate = dateparse(bdate).strftime('%B %e') except: bdate = 'unknown date' title = (bill.get('popular_title') or bill.get('short_title') or bill.get('official_title') or '') ctx = bill.get('context', []) bill['summary'] = bill.get('summary') or '' bill_context = { 'date': bdate, 'chamber': bill['chamber'], 'bill_type': btype, 'bill_number': bnumber, 'bill_title': title.encode('ascii', 'ignore'), 'bill_description': '\n'.join(ctx).encode('ascii', 'ignore'), } if len(bill.get('actions', [])): bill_context.update(bill_status="%s on %s" % (bill['last_action'].get('text'), dateparse(bill['last_action'].get('acted_at')).strftime('%B %e, %Y'))) else: bill_context.update(bill_status='No known actions taken yet.') sponsor = bill.get('sponsor') if sponsor: sponsor_party = sponsor.get('party') sponsor_state = sponsor.get('state') if sponsor_party and sponsor_state: bill_context.update(sponsor="Sponsored by: %s, %s, %s" % (_format_legislator(sponsor)['fullname'], party_for(sponsor['party']), state_for(sponsor['state']))) else: bill_context.update(sponsor="Sponsored by: %s" % _format_legislator(sponsor)['fullname']) cosponsors = bill.get('cosponsors', []) if len(cosponsors): bill_context.update(cosponsors="Cosponsored by: %s" % ', '.join(["%s, %s, %s" % (_format_legislator(cs)['fullname'], party_for(cs['party']), state_for(cs['state'])) for cs in cosponsors])) bill.update(bill_context=bill_context) return bill
def discover(s): if not s: return null for f in string_coercions: try: return discover(f(s)) except: pass # don't let dateutil parse things like sunday, monday etc into dates if s.isalpha(): return string try: d = dateparse(s) if not d.time(): return date_ if not d.date(): return time_ return datetime_ except: pass return string
def skeda_from_f133(data_dict, filing_number, header_row_id, is_amended, cd): data_dict['transaction_id'] = data_dict['transaction_id'][:20] data_dict['header_id'] = header_row_id data_dict['superceded_by_amendment'] = is_amended data_dict['filing_number'] = filing_number # map refund to contributions data_dict['contribution_amount'] = data_dict['refund_amount'] data_dict['contribution_date'] = data_dict['refund_date'] # so logging doesn't complain about unexpected value del data_dict['refund_date'] del data_dict['refund_amount'] data_dict['contribution_amount'] = validate_decimal(data_dict['contribution_amount']) # flip signs if this is positive. if data_dict['contribution_amount'] > 0: data_dict['contribution_amount'] = 0-data_dict['contribution_amount'] if data_dict['contribution_date']: try: data_dict['contribution_date_formatted'] = dateparse(data_dict['contribution_date']) except ValueError: # if we can't parse the date, just ignore it. pass cd.writerow('A', data_dict)
def get_context_data(self, **kwargs): context = super(SingleDayView, self).get_context_data(**kwargs) session_list = self.get_queryset() for sess in list(session_list): sess.start_time = sess.start_time.astimezone(timezone.get_current_timezone()) locations = Location.objects.with_sessions().filter(event=self.event, sessions__in=context['session_list'] ).distinct() try: lunchtime = self.get_queryset().filter( title__istartswith='lunch')[0].start_time.astimezone(timezone.get_current_timezone()) except IndexError: lunchtime = None timeslots = self.request.GET.get('timeslots', '').split(',') timeslots = [dateparse(time).time() for time in timeslots] context['session_list'] = session_list context['event'] = self.event context['locations'] = locations context['timeslots'] = timeslots context['lunchtime'] = lunchtime context['now'] = timezone.now().astimezone(timezone.get_current_timezone()) context['now_minus_session_length'] = context['now'] - context['event'].session_length return context
def _process_change(payload, user, repo, repo_url, project, event, codebase=None): """ Consumes the JSON as a python object and actually starts the build. :arguments: payload Python Object that represents the JSON sent by GitHub Service Hook. """ changes = [] refname = payload['ref'] # We only care about regular heads or tags match = re.match(r"^refs/(heads|tags)/(.+)$", refname) if not match: log.msg("Ignoring refname `%s': Not a branch" % refname) return changes branch = match.group(2) if payload.get('deleted'): log.msg("Branch `%s' deleted, ignoring" % branch) return changes for commit in payload['commits']: if not commit.get('distinct', True): log.msg('Commit `%s` is a non-distinct commit, ignoring...' % (commit['id'],)) continue files = [] for kind in ('added', 'modified', 'removed'): files.extend(commit.get(kind, [])) when_timestamp = dateparse(commit['timestamp']) log.msg("New revision: %s" % commit['id'][:8]) change = { 'author': '%s <%s>' % (commit['author']['name'], commit['author']['email']), 'files': files, 'comments': commit['message'], 'revision': commit['id'], 'when_timestamp': when_timestamp, 'branch': branch, 'revlink': commit['url'], 'repository': repo_url, 'project': project, 'properties': { 'event': event, }, } if codebase is not None: change['codebase'] = codebase changes.append(change) return changes
def readfile(filelocation): fh = open(filelocation, 'r') reader = csv.DictReader(fh) count = 0 for newstyle_row in reader: row = transform_column_headers(newstyle_row) try: thiscom = f1filer.objects.get(cmte_id=row['cmte_id']) except f1filer.DoesNotExist: print "Creating %s %s" % (row['cmte_id'], row['cmte_nm']) # first create the f1filer object: row['cycle'] = CURRENT_CYCLE try: row['receipt_dt'] = dateparse(row['receipt_dt']) except: print "can't parse original receipt date='%s', skipping" % (row['receipt_dt']) continue try: del row[None] except KeyError: pass print row thisf1 = f1filer(**row) thisf1.save() ## if we are creating a new f1, check if it's a committee and if not, create one. make_committee_from_f1filer(row['cmte_id'], row['cycle'])
def handle(self, *args, **options): event = Event.objects.get(pk=int(options.get('event_id'))) timeslot = options.get('timeslot') skipdelta = options.get('skipdelta') if skipdelta: skipdelta = datetime.timedelta(seconds=int(options.get('skipdelta'))) else: skipdelta = None if timeslot == 'next': sessions = Session.objects.next().filter(event=event) timeslot = sessions[0].start_time else: timeslot = dateparse(timeslot).replace(tzinfo=timezone.get_current_timezone()) if skipdelta is not None and timezone.now() + skipdelta < timeslot: print 'Sessions are too far in the future, aborting.' return try: tweet = SessionBlockTweet.objects.get(event=event, timeslot=timeslot, previous=None, sent_at=None) except SessionBlockTweet.DoesNotExist: print 'No tweets have been generated for this timeslot, or tweets have been sent already. Run ./manage.py generatetweets --event-id=%s --timeslot=%s and try again' % (event.id, timeslot.isoformat()) return tweet.send() print 'Sent %d tweets for block %s.' % (tweet.total, timeslot.isoformat())
def format_tweet(tweet, tweeter_db_id): t = dict( text = tweet['text'], created_at = dateparse(tweet['created_at']), favorite_counts = tweet['favorite_count'], retweet_counts = tweet['retweet_count'], tweeter_id = tweeter_db_id ) return Tweet(**t)
def version_dates(self): ret = OrderedDict() for release, info in self.release_info: if info: upload_time = dateparse(info[0]['upload_time']) ret[release] = upload_time return ret
def handle_pull_request(self, payload): changes = [] number = payload["number"] commits = payload["pull_request"]["commits"] log.msg("Processing GitHub PR #%d" % number, logLevel=logging.DEBUG) action = payload.get("action") if action not in ("opened", "reopened", "synchronize"): log.msg("GitHub PR #%d %s, ignoring" % (number, action)) return changes, "git" change = { "revision": payload["pull_request"]["head"]["sha"], "when_timestamp": dateparse(payload["pull_request"]["created_at"]), "branch": payload["pull_request"]["head"]["ref"], "revlink": payload["pull_request"]["_links"]["html"]["href"], "repository": payload["repository"]["clone_url"], "category": "pull", # TODO: Get author name based on login id using txgithub module "author": payload["sender"]["login"], "comments": "GitHub Pull Request #%d (%d commit%s)" % (number, commits, "s" if commits != 1 else ""), } if callable(self._codebase): change["codebase"] = self._codebase(payload) elif self._codebase is not None: change["codebase"] = self._codebase changes.append(change) log.msg("Received %d changes from GitHub PR #%d" % (len(changes), number)) return changes, "git"
def create_action_details_from_trace (self, project, labels, time_ranges, threads, trace_info, extended_info): if (type(labels) is not list): return [] results = {} first_paint_time = None dom_content_loaded_time = None load_time = None create_action_if_needed = (len(labels) == 0) to_save = [] def sum(l): total = 0 for v in l: total += v return total # Default the trace date to the time the blob was uploaded. trace_date = trace_info.date if ('datetime' in extended_info): try: # Get the date, parse it, and set back to UTC for AppEngine trace_date = dateparse(extended_info['datetime']) trace_date = trace_date.replace(tzinfo=None) + trace_date.utcoffset() except Exception, e: # Fail nicely trace_date = trace_info.date
def handle_release(self, payload, event): repo_url = payload['repository']['html_url'] release = payload['release']['tag_name'] project = payload['repository']['full_name'] title = u'{} [{} release {}]'.format(payload['release']['name'], project, release) comment = payload['release']['body'] change = { 'author': u'{} <{}>'.format(payload['release']['author']['full_name'], payload['release']['author']['email']), 'branch': release, 'category': 'release', 'comments': u'{}\n{}'.format(title, comment), 'revision': release, 'when_timestamp': dateparse(payload['release']['created_at']), 'repository': payload['repository']['clone_url'], 'project': project, 'properties': { 'action': payload['action'], 'draft': payload['release']['draft'], 'prerelease': payload['release']['prerelease'], 'event': event, }, 'revlink': u'{}/src/{}'.format(repo_url, release), } return [ change, ]
def get_commit(self, commit): files = [] for kind in ('added', 'modified', 'removed'): files.extend(commit.get(kind, [])) change = { 'author': u'{} <{}>'.format(commit['author']['name'], commit['author']['email']), 'files': files, 'comments': commit['message'], 'revision': commit['id'], 'when_timestamp': dateparse(commit['timestamp']), 'revlink': commit['url'], } return change
def day(args, *extra, **kwargs): parser = argparse.ArgumentParser() parser.add_argument( 'username', help='The MyFitnessPal username for which to delete a stored password.' ) parser.add_argument('date', nargs='?', default=datetime.now().strftime('%Y-%m-%d'), type=lambda datestr: dateparse(datestr).date(), help=u'The date for which to display information.') args = parser.parse_args(extra) password = get_password_from_keyring_or_interactive(args.username) client = Client(args.username, password) day = client.get_date(args.date) t = Terminal() print(t.blue(args.date.strftime('%Y-%m-%d'))) for meal in day.meals: print(t.bold(meal.name.title())) for entry in meal.entries: print(u'* {entry.name}'.format(entry=entry)) print( t.italic_bright_black( u' {entry.nutrition_information}'.format(entry=entry))) print(u'') print(t.bold("Totals")) for key, value in day.totals.items(): print(u'{key}: {value}'.format( key=key.title(), value=value, )) print(u'Water: {amount}'.format(amount=day.water)) if day.notes: print(t.italic(day.notes))
def day(args, *extra, **kwargs): parser = argparse.ArgumentParser() parser.add_argument( "username", help="The MyFitnessPal username for which to delete a stored password.", ) parser.add_argument( "date", nargs="?", default=datetime.now().strftime("%Y-%m-%d"), type=lambda datestr: dateparse(datestr).date(), help="The date for which to display information.", ) args = parser.parse_args(extra) password = get_password_from_keyring_or_interactive(args.username) client = Client(args.username, password) day = client.get_date(args.date) date_str = args.date.strftime("%Y-%m-%d") print(f"[blue]{date_str}[/blue]") for meal in day.meals: print(f"[bold]{meal.name.title()}[/bold]") for entry in meal.entries: print(f"* {entry.name}") print(f" [italic bright_black]{entry.nutrition_information}" f"[/italic bright_black]") print("") print("[bold]Totals[/bold]") for key, value in day.totals.items(): print("{key}: {value}".format( key=key.title(), value=value, )) print(f"Water: {day.water}") if day.notes: print(f"[italic]{day.notes}[/italic]")
def getChanges(self, request): """Catch a POST request from BitBucket and start a build process Check the URL below if you require more information about payload https://confluence.atlassian.com/display/BITBUCKET/POST+Service+Management :param request: the http request Twisted object :param options: additional options """ event_type = request.getHeader(_HEADER_EVENT) event_type = bytes2unicode(event_type) payload = json.loads(bytes2unicode(request.args[b'payload'][0])) repo_url = '{}{}'.format( payload['canon_url'], payload['repository']['absolute_url']) project = request.args.get(b'project', [b''])[0] project = bytes2unicode(project) changes = [] for commit in payload['commits']: changes.append({ 'author': commit['raw_author'], 'files': [f['file'] for f in commit['files']], 'comments': commit['message'], 'revision': commit['raw_node'], 'when_timestamp': dateparse(commit['utctimestamp']), 'branch': commit['branch'], 'revlink': '{}commits/{}'.format(repo_url, commit['raw_node']), 'repository': repo_url, 'project': project, 'properties': { 'event': event_type, }, }) log.msg('New revision: {}'.format(commit['node'])) log.msg('Received {} changes from bitbucket'.format(len(changes))) return (changes, payload['repository']['scm'])
def get_initial_queryset(self): calls = PhoneCall.objects.get_phone_calls_for_user( self.request.user).prefetch_related('content_object') filter_kwargs = {} if self.request.GET.get('call_date'): filter_kwargs['call_date'] = dateparse( self.request.GET.get('call_date')).strftime('%Y-%m-%d') if self.request.GET.get('city'): filter_kwargs['city'] = self.request.GET.get('city') if self.request.GET.get('state'): filter_kwargs['state'] = self.request.GET.get('state') if self.request.GET.get('zip'): filter_kwargs['zip'] = self.request.GET.get('zip') if self.request.GET.get('handraises'): filter_kwargs['handraises__isnull'] = False return calls.filter(**filter_kwargs)
def skeda_from_skedadict(data_dict, filing_number, header_row_id, is_amended, cd): """ We can either pass the header row in or not; if not, look it up.""" data_dict['transaction_id'] = data_dict['transaction_id'][:20] data_dict['header_id'] = header_row_id data_dict['superceded_by_amendment'] = is_amended data_dict['filing_number'] = filing_number if data_dict['contribution_date']: try: data_dict['contribution_date_formatted'] = dateparse( data_dict['contribution_date']) except ValueError: # if we can't parse the date, just ignore it. pass data_dict['contribution_amount'] = validate_decimal( data_dict['contribution_amount']) data_dict['contribution_aggregate'] = validate_decimal( data_dict['contribution_aggregate']) cd.writerow('A', data_dict)
def parse_states(fpath): state_map = {'H': 'Home', 'M': 'Maze', 'St': 'LDstim', 'O': 'Old open field', 'Oc': 'Old open field w/ curtain', 'N': 'New open field', 'Ns': 'New open field w/ LDstim', '5hS': '5 hrs of 1 sec every 5 sec', 'L': 'Large open field', 'X': 'Extra large open field', 'Nc': 'New open field w/ curtain'} subject_path, fname = os.path.split(fpath) fpath_base, fname = os.path.split(subject_path) subject_id, date_text = fname.split('-') session_date = dateparse(date_text, yearfirst=True) mouse_num = ''.join(filter(str.isdigit, subject_id)) exp_sheet_path = os.path.join(subject_path, 'YM' + mouse_num + ' exp_sheet.xlsx') df = pd.read_excel(exp_sheet_path, sheet_name=1) state_ids = df[df['implanted'] == session_date].values[0, 2:15] statepath = os.path.join(fpath, 'EEGlength') state_times = pd.read_csv(statepath).values states = [state_map[x] for x, _ in zip(state_ids, state_times)] return states, state_times
def _check_row(self,row): """ Checks a row of cells against the current rules in self.row_rules Args: row: List of cells Returns: True/False """ for rule in self.row_rules: try: if rule['val_type'] == 'date': v=float(dateparse(row[rule['col']-1]).strftime('%s')) elif rule['val_type'] == 'number': v=float(row[rule['col']-1]) else: v=str(row[rule['col']-1]) if not rule['op'](v): return False except: return False return True
def parse_url_for_timestamp(url): parsed_url = urlparse.urlparse(url) qs = urlparse.parse_qs(parsed_url.query) raw_time = None if 't' in qs: raw_time = qs['t'][0] elif 't=' in parsed_url.fragment: raw_time = parsed_url.fragment.split('=')[1] #assume only one frag result = (0, 0) if raw_time: try: raw = dateparse(raw_time) result = raw.minute, raw.second except Exception as e: print 'date error:', e result = (0, 0) else: print 'resulting time', result return result
def read_mail_file(filename): """parse and extract features from the given email file""" headers = email.parser.Parser().parse(open(filename, "r"), headersonly=True) subject = headers['subject'] words = set(subject.split()) sender = headers['from'] # NB: split-n-strip is not the most robust way to parse emails out of a list. We're # hoping that the email-normalization that was done to this dataset makes up for that. raw_recipients = headers.get_all('to', []) + \ headers.get_all('cc', []) + \ headers.get_all('bcc', []) recipients = set(address.strip() for field in raw_recipients for address in field.split(",")) msgdate = dateparse(headers.get('date', a_bad_time)) day = msgdate.strftime("%Y-%m-%d") return EmailRecord(sender, recipients, msgdate, day, subject, words, headers.defects, filename)
def discover(s): if not s: return null for f in string_coercions: try: return discover(f(s)) except (ValueError, KeyError): pass # don't let dateutil parse things like sunday, monday etc into dates if s.isalpha() or s.isspace(): return string try: d = dateparse(s) except (ValueError, OverflowError): # OverflowError for stuff like 'INF...' pass else: return date_ if is_zero_time(d.time()) else datetime_ return string
def handle_pull_request(self, payload): changes = [] number = payload['number'] refname = 'refs/pull/%d/merge' % (number,) commits = payload['pull_request']['commits'] log.msg('Processing GitHub PR #%d' % number, logLevel=logging.DEBUG) action = payload.get('action') if action not in ('opened', 'reopened', 'synchronize'): log.msg("GitHub PR #%d %s, ignoring" % (number, action)) return changes, 'git' change = { 'revision': payload['pull_request']['head']['sha'], 'when_timestamp': dateparse(payload['pull_request']['created_at']), 'branch': refname, 'revlink': payload['pull_request']['_links']['html']['href'], 'repository': payload['repository']['html_url'], 'project': payload['pull_request']['base']['repo']['full_name'], 'category': 'pull', # TODO: Get author name based on login id using txgithub module 'author': payload['sender']['login'], 'comments': 'GitHub Pull Request #%d (%d commit%s)' % ( number, commits, 's' if commits != 1 else ''), } if callable(self._codebase): change['codebase'] = self._codebase(payload) elif self._codebase is not None: change['codebase'] = self._codebase changes.append(change) log.msg("Received %d changes from GitHub PR #%d" % ( len(changes), number)) return changes, 'git'
def get_hhapps_client(name): access_token = None expiry_date = None user = current_user if user.is_authenticated: access_token = user.token.get('access-token', None) expiry_date = dateparse(user.token.get('expiry-date')) else: return None schemas = app.config.get('HHCLIENT_APPS_SCHEMAS', {}) hhapps = app.config.get('HHCLIENT_APPS_ATTRIBUTES', {}) now = datetime.datetime.utcnow() if expiry_date and now > expiry_date: refresh_token() access_token = session['token']['access-token'] if name not in hhapps: application = get_application(name) hhapps[name] = application.data app.config['HHCLIENT_APPS_ATTRIBUTES'] = hhapps print('apps', hhapps[name]['publicurl']) Client = get_client(name) client = Client(api_url=hhapps[name]['publicurl'], access_token=access_token, schemas=schemas.get(name, None)) if name not in schemas: if 'HHCLIENT_APPS_SCHEMAS' not in app.config: app.config['HHCLIENT_APPS_SCHEMAS'] = {} app.config['HHCLIENT_APPS_SCHEMAS'][name] = client.schemas return client
def init_hhclient(): if g.get('hhclient', None): return name = None password = None access_token = None expiry_date = None user = current_user # auth = session.get('auth', None) # print(user.data) if user.is_authenticated: access_token = user.token.get('access-token', None) expiry_date = dateparse(user.token.get('expiry-date')) schemas = app.config.get('HHCLIENT_SCHEMAS', None) host = app.config.get('HHCLIENT_HOST') port = app.config.get('HHCLIENT_PORT') secure = app.config.get('HHCLIENT_SECURE') now = datetime.datetime.utcnow() if expiry_date and now > expiry_date: refresh_token() access_token = session['token']['access-token'] Client = get_client('hhservice') g.hhclient = Client(name=name, password=password, host=host, port=port, secure_connection=secure, access_token=access_token, schemas=schemas) if not schemas: app.config['HHCLIENT_SCHEMAS'] = g.hhclient.schemas
def save_fec_contribution(committee, contribution): try: contribution = Contribution.objects.create( committee=committee, filing_number=contribution['FECTransID'], name=contribution['Contrib'], date=dateparse(contribution['Date']), employer=contribution['Orgname'], occupation=contribution['Occ_EF'], street1=contribution['Street'], street2='', city=contribution['City'], state=contribution['State'], zipcode=contribution['Zip'], amount=contribution['Amount'], aggregate=0, memo='', url='', data_row='') except IntegrityError: print 'Skipping' return None return contribution
def modifyData(self, v): '''Modify/Fix up data if necessary''' if isinstance(v, unicode): # Convert iso8601 dates in unicode to datetime: # eq. 2013-12-15T13:31:21.000+0000 if len(v) >= 17: # Quick parse: if v[4] + v[7] + v[10] + v[13] + v[16] == '--T::': return dateparse(v).replace(tzinfo=None) # Convert unicode dates to datetime # eq. 2013-12-15 elif len(v) == 10: # Quick parse:' if v[4] + v[7] == '--' and (v[0:4] + v[5:7] + v[8:10]).isdigit(): return str2datetime(v) # Not change return v
def get_metadata(self): """Auto-fill all relevant metadata used in run_conversion.""" session_path = self.data_interface_objects["GrosmarkLFP"].input_args["folder_path"] subject_path, session_id = os.path.split(session_path) if "_" in session_id: subject_id, date_text = session_id.split("_") session_start = dateparse(date_text[-4:] + date_text[:-4]) xml_filepath = os.path.join(session_path, "{}.xml".format(session_id)) root = et.parse(xml_filepath).getroot() n_total_channels = int(root.find("acquisitionSystem").find("nChannels").text) shank_channels = [ [int(channel.text) for channel in group.find("channels")] for group in root.find("spikeDetection").find("channelGroups").findall("group") ] all_shank_channels = np.concatenate(shank_channels) all_shank_channels.sort() spikes_nsamples = int(root.find("neuroscope").find("spikes").find("nSamples").text) lfp_sampling_rate = float(root.find("fieldPotentials").find("lfpSamplingRate").text) shank_electrode_number = [x for channels in shank_channels for x, _ in enumerate(channels)] shank_group_name = ["shank{}".format(n + 1) for n, channels in enumerate(shank_channels) for _ in channels] cell_filepath = os.path.join(session_path, "{}.spikes.cellinfo.mat".format(session_id)) if os.path.isfile(cell_filepath): cell_info = loadmat(cell_filepath)["spikes"] celltype_mapping = {"pE": "excitatory", "pI": "inhibitory"} celltype_filepath = os.path.join(session_path, "{}.CellClass.cellinfo.mat".format(session_id)) if os.path.isfile(celltype_filepath): celltype_info = [ str(celltype_mapping[x[0]]) for x in loadmat(celltype_filepath)["CellClass"]["label"][0][0][0] ] device_name = "implant" metadata = dict( NWBFile=dict( identifier=session_id, session_start_time=session_start.astimezone(), file_create_date=datetime.now().astimezone(), session_id=session_id, institution="NYU", lab="Buzsaki", ), Subject=dict( subject_id=subject_id, ), BuzsakiNoRecording=dict( Ecephys=dict( subset_channels=all_shank_channels, Device=[dict(name=device_name)], ElectrodeGroup=[ dict( name=f"shank{n+1}", description=f"shank{n+1} electrodes", device_name=device_name, ) for n, _ in enumerate(shank_channels) ], Electrodes=[ dict( name="shank_electrode_number", description="0-indexed channel within a shank.", data=shank_electrode_number, ), dict( name="group", description="A reference to the ElectrodeGroup this electrode is a part of.", data=shank_group_name, ), dict( name="group_name", description="The name of the ElectrodeGroup this electrode is a part of.", data=shank_group_name, ), ], ) ), NeuroscopeSorting=dict( UnitProperties=[ dict( name="cell_type", description="name of cell type", data=celltype_info, ), dict( name="global_id", description="global id for cell for entire experiment", data=[int(x) for x in cell_info["UID"][0][0][0]], ), dict( name="shank_id", description="0-indexed id of cluster from shank", # - 2 b/c the 0 and 1 IDs from each shank have been removed data=[int(x - 2) for x in cell_info["cluID"][0][0][0]], ), dict( name="electrode_group", description="the electrode group that each spike unit came from", data=["shank" + str(x) for x in cell_info["shankID"][0][0][0]], ), dict( name="region", description="brain region where unit was detected", data=[str(x[0]) for x in cell_info["region"][0][0][0]], ), ] ), GrosmarkLFP=dict(), GrosmarkBehavior=dict(), ) return metadata
def __init__(self, stringdate=None): if stringdate is None: self.date = datetime.utcnow() else: self.date = dateparse(stringdate) self.date.replace(tzinfo=pytz.UTC)
def handle(self, *args, **options): if not options.get('file', False): print('--file is required') exit(1) wb = load_workbook(options['file']) ws = wb.get_active_sheet() rowcount = 0 savedcount = 0 date_signed = None if options.get('date_signed', False): try: date_signed = dateparse(options.get('date_signed')) except ValueError: print('Invalid date for date_signed!') exit(1) for row in ws.rows: dt_signed = None if row[12].value: if isinstance(row[12].value, date) or isinstance( row[12].value, datetime): dt_signed = row[12].value elif isinstance(row[12].value, str): try: dt_signed = dateparse(row[12].value.strip()) except ValueError: print( 'INVALID VALUE FOR DATE/TIME SIGNED! Skipping ...') continue if date_signed: if dt_signed.date() < date_signed.date(): print('dt_signed older than date_signed. Skipping ...') continue email = None if row[4].value and isinstance(row[4].value, str): try: email = row[4].value.strip().lower() validate_email(email) except ValidationError: print('INVALID EMAIL! {} Skipping ...'.format( row[4].value)) continue first_name = None if row[2].value and isinstance(row[2].value, str): first_name = row[2].value.strip().title() last_name = None if row[3].value and isinstance(row[3].value, str): last_name = row[3].value.strip().title() zip = None non_conforming_zip = None if row[8].value: zip = str(row[8].value).strip() # if it's only 4 characters and all numbers, throw in a leading 0 if len(zip) == 4: try: int(zip) zip = '0{}'.format(zip) except ValueError: pass # if it's 9 characters and all numbers, throw in a - if len(zip) == 9: try: int(zip) zip = '{}-{}'.format(zip[:5], zip[5:]) except ValueError: pass # if we get to here and still don't have a valid zip, we'll throw it in the non-conforming field if not re.match(ZIP_REGEX, zip): non_conforming_zip = zip zip = None city = None state = None if zip: try: zipcode = ZipCode.objects.get(zip=zip[:5]) city = zipcode.city state = zipcode.state except ZipCode.DoesNotExist: print('COULD NOT DETERMINE STATE FROM ZIP {}!'.format(zip)) try: PetitionSigner.objects.get(email__iexact=email) print( 'PETITION SIGNER FOR EMAIL {} EXISTS! Skipping ...'.format( email)) continue except PetitionSigner.DoesNotExist: print('NO PETITION SIGNER FOR EMAIL {}! Creating ...'.format( email)) PetitionSigner.objects.create( first_name=first_name, last_name=last_name, email=email, city=city, state=state, zip=zip, non_conforming_zip=non_conforming_zip, dt_signed=dt_signed) savedcount += 1 # if the petition signer is also a volunteer, update the signed date try: volunteer = Volunteer.objects.get(email__iexact=email) print( 'Volunteer found for email {}! Updating petition signed date ...' .format(email)) volunteer.dt_signed_petition = dt_signed volunteer.save() except Volunteer.DoesNotExist: pass rowcount += 1 print( '*** PROCESSING COMPLETE. {} rows processed, {} petition signers created.' .format(rowcount, savedcount))
def get_collection_items(self, headers, args, dataset, pathinfo=None): """ Queries feature collection :param headers: dict of HTTP headers :param args: dict of HTTP request parameters :param dataset: dataset name :param pathinfo: path location :returns: tuple of headers, status code, content """ headers_ = HEADERS.copy() properties = [] reserved_fieldnames = [ 'bbox', 'f', 'limit', 'startindex', 'resulttype', 'datetime', 'sortby' ] formats = FORMATS formats.extend(f.lower() for f in PLUGINS['formatter'].keys()) if dataset not in self.config['datasets'].keys(): exception = { 'code': 'InvalidParameterValue', 'description': 'Invalid feature collection' } LOGGER.error(exception) return headers_, 400, json.dumps(exception, default=json_serial) format_ = check_format(args, headers) if format_ is not None and format_ not in formats: exception = { 'code': 'InvalidParameterValue', 'description': 'Invalid format' } LOGGER.error(exception) return headers_, 400, json.dumps(exception) LOGGER.debug('Processing query parameters') LOGGER.debug('Processing startindex parameter') try: startindex = int(args.get('startindex')) if startindex < 0: exception = { 'code': 'InvalidParameterValue', 'description': 'startindex value should be positive ' + 'or zero' } LOGGER.error(exception) return headers_, 400, json.dumps(exception) except (TypeError) as err: LOGGER.warning(err) startindex = 0 except ValueError as err: LOGGER.warning(err) exception = { 'code': 'InvalidParameterValue', 'description': 'startindex value should be an integer' } LOGGER.error(exception) return headers_, 400, json.dumps(exception) LOGGER.debug('Processing limit parameter') try: limit = int(args.get('limit')) # TODO: We should do more validation, against the min and max # allowed by the server configuration if limit <= 0: exception = { 'code': 'InvalidParameterValue', 'description': 'limit value should be strictly positive' } LOGGER.error(exception) return headers_, 400, json.dumps(exception) except TypeError as err: LOGGER.warning(err) limit = int(self.config['server']['limit']) except ValueError as err: LOGGER.warning(err) exception = { 'code': 'InvalidParameterValue', 'description': 'limit value should be an integer' } LOGGER.error(exception) return headers_, 400, json.dumps(exception) resulttype = args.get('resulttype') or 'results' LOGGER.debug('Processing bbox parameter') try: bbox = args.get('bbox').split(',') if len(bbox) != 4: exception = { 'code': 'InvalidParameterValue', 'description': 'bbox values should be minx,miny,maxx,maxy' } LOGGER.error(exception) return headers_, 400, json.dumps(exception) except AttributeError: bbox = [] try: bbox = [float(c) for c in bbox] except ValueError: exception = { 'code': 'InvalidParameterValue', 'description': 'bbox values must be numbers' } LOGGER.error(exception) return headers_, 400, json.dumps(exception) LOGGER.debug('Processing datetime parameter') # TODO: pass datetime to query as a `datetime` object # we would need to ensure partial dates work accordingly # as well as setting '..' values to `None` so that underlying # providers can just assume a `datetime.datetime` object # # NOTE: needs testing when passing partials from API to backend datetime_ = args.get('datetime') datetime_invalid = False if (datetime_ is not None and 'temporal' in self.config['datasets'][dataset]['extents']): te = self.config['datasets'][dataset]['extents']['temporal'] if te['begin'].tzinfo is None: te['begin'] = te['begin'].replace(tzinfo=pytz.UTC) if te['end'].tzinfo is None: te['end'] = te['end'].replace(tzinfo=pytz.UTC) if '/' in datetime_: # envelope LOGGER.debug('detected time range') LOGGER.debug('Validating time windows') datetime_begin, datetime_end = datetime_.split('/') if datetime_begin != '..': datetime_begin = dateparse(datetime_begin) if datetime_begin.tzinfo is None: datetime_begin = datetime_begin.replace( tzinfo=pytz.UTC) if datetime_end != '..': datetime_end = dateparse(datetime_end) if datetime_end.tzinfo is None: datetime_end = datetime_end.replace(tzinfo=pytz.UTC) if te['begin'] is not None and datetime_begin != '..': if datetime_begin < te['begin']: datetime_invalid = True if te['end'] is not None and datetime_end != '..': if datetime_end > te['end']: datetime_invalid = True else: # time instant datetime__ = dateparse(datetime_) if datetime__ != '..': if datetime__.tzinfo is None: datetime__ = datetime__.replace(tzinfo=pytz.UTC) LOGGER.debug('detected time instant') if te['begin'] is not None and datetime__ != '..': if datetime__ < te['begin']: datetime_invalid = True if te['end'] is not None and datetime__ != '..': if datetime__ > te['end']: datetime_invalid = True if datetime_invalid: exception = { 'code': 'InvalidParameterValue', 'description': 'datetime parameter out of range' } LOGGER.error(exception) return headers_, 400, json.dumps(exception) LOGGER.debug('Loading provider') try: p = load_plugin('provider', self.config['datasets'][dataset]['provider']) except ProviderConnectionError: exception = { 'code': 'NoApplicableCode', 'description': 'connection error (check logs)' } LOGGER.error(exception) return headers_, 500, json.dumps(exception) except ProviderQueryError: exception = { 'code': 'NoApplicableCode', 'description': 'query error (check logs)' } LOGGER.error(exception) return headers_, 500, json.dumps(exception) LOGGER.debug('processing property parameters') for k, v in args.items(): if k not in reserved_fieldnames and k not in p.fields.keys(): exception = { 'code': 'InvalidParameterValue', 'description': 'unknown query parameter' } LOGGER.error(exception) return headers_, 400, json.dumps(exception) elif k not in reserved_fieldnames and k in p.fields.keys(): LOGGER.debug('Add property filter {}={}'.format(k, v)) properties.append((k, v)) LOGGER.debug('processing sort parameter') val = args.get('sortby') if val is not None: sortby = [] sorts = val.split(',') for s in sorts: if ':' in s: prop, order = s.split(':') if order not in ['A', 'D']: exception = { 'code': 'InvalidParameterValue', 'description': 'sort order should be A or D' } LOGGER.error(exception) return headers_, 400, json.dumps(exception) sortby.append({'property': prop, 'order': order}) else: sortby.append({'property': s, 'order': 'A'}) for s in sortby: if s['property'] not in p.fields.keys(): exception = { 'code': 'InvalidParameterValue', 'description': 'bad sort property' } LOGGER.error(exception) return headers_, 400, json.dumps(exception) else: sortby = [] LOGGER.debug('Querying provider') LOGGER.debug('startindex: {}'.format(startindex)) LOGGER.debug('limit: {}'.format(limit)) LOGGER.debug('resulttype: {}'.format(resulttype)) LOGGER.debug('sortby: {}'.format(sortby)) try: content = p.query(startindex=startindex, limit=limit, resulttype=resulttype, bbox=bbox, datetime=datetime_, properties=properties, sortby=sortby) except ProviderConnectionError as err: exception = { 'code': 'NoApplicableCode', 'description': 'connection error (check logs)' } LOGGER.error(err) return headers_, 500, json.dumps(exception) except ProviderQueryError as err: exception = { 'code': 'NoApplicableCode', 'description': 'query error (check logs)' } LOGGER.error(err) return headers_, 500, json.dumps(exception) except ProviderGenericError as err: exception = { 'code': 'NoApplicableCode', 'description': 'generic error (check logs)' } LOGGER.error(err) return headers_, 500, json.dumps(exception) serialized_query_params = '' for k, v in args.items(): if k not in ('f', 'startindex'): serialized_query_params += '&' serialized_query_params += urllib.parse.quote(k, safe='') serialized_query_params += '=' serialized_query_params += urllib.parse.quote(str(v), safe=',') content['links'] = [{ 'type': 'application/geo+json', 'rel': 'self' if not format_ or format_ == 'json' else 'alternate', 'title': 'This document as GeoJSON', 'href': '{}/collections/{}/items?f=json{}'.format( self.config['server']['url'], dataset, serialized_query_params) }, { 'rel': 'self' if format_ == 'jsonld' else 'alternate', 'type': 'application/ld+json', 'title': 'This document as RDF (JSON-LD)', 'href': '{}/collections/{}/items?f=jsonld{}'.format( self.config['server']['url'], dataset, serialized_query_params) }, { 'type': 'text/html', 'rel': 'self' if format_ == 'html' else 'alternate', 'title': 'This document as HTML', 'href': '{}/collections/{}/items?f=html{}'.format( self.config['server']['url'], dataset, serialized_query_params) }] if startindex > 0: prev = max(0, startindex - limit) content['links'].append({ 'type': 'application/geo+json', 'rel': 'prev', 'title': 'items (prev)', 'href': '{}/collections/{}/items?startindex={}{}'.format( self.config['server']['url'], dataset, prev, serialized_query_params) }) if len(content['features']) == limit: next_ = startindex + limit content['links'].append({ 'type': 'application/geo+json', 'rel': 'next', 'title': 'items (next)', 'href': '{}/collections/{}/items?startindex={}{}'.format( self.config['server']['url'], dataset, next_, serialized_query_params) }) content['links'].append({ 'type': 'application/json', 'title': self.config['datasets'][dataset]['title'], 'rel': 'collection', 'href': '{}/collections/{}'.format(self.config['server']['url'], dataset) }) content['timeStamp'] = datetime.utcnow().strftime( '%Y-%m-%dT%H:%M:%S.%fZ') if format_ == 'html': # render headers_['Content-Type'] = 'text/html' # For constructing proper URIs to items if pathinfo: path_info = '/'.join([ self.config['server']['url'].rstrip('/'), pathinfo.strip('/') ]) else: path_info = '/'.join([ self.config['server']['url'].rstrip('/'), headers.environ['PATH_INFO'].strip('/') ]) content['items_path'] = path_info content['dataset_path'] = '/'.join(path_info.split('/')[:-1]) content['collections_path'] = '/'.join(path_info.split('/')[:-2]) content['startindex'] = startindex content = render_j2_template(self.config, 'items.html', content) return headers_, 200, content elif format_ == 'csv': # render formatter = load_plugin('formatter', {'name': 'CSV', 'geom': True}) content = formatter.write( data=content, options={ 'provider_def': self.config['datasets'][dataset]['provider'] }) headers_['Content-Type'] = '{}; charset={}'.format( formatter.mimetype, self.config['server']['encoding']) cd = 'attachment; filename="{}.csv"'.format(dataset) headers_['Content-Disposition'] = cd return headers_, 200, content elif format_ == 'jsonld': headers_['Content-Type'] = 'application/ld+json' content = geojson2geojsonld(self.config, content, dataset) return headers_, 200, content return headers_, 200, json.dumps(content, default=json_serial)
def handle_pull_request(self, payload, event): changes = [] number = payload['number'] refname = 'refs/pull/{}/{}'.format(number, self.pullrequest_ref) basename = payload['pull_request']['base']['ref'] commits = payload['pull_request']['commits'] title = payload['pull_request']['title'] comments = payload['pull_request']['body'] repo_full_name = payload['repository']['full_name'] head_sha = payload['pull_request']['head']['sha'] log.msg('Processing GitHub PR #{}'.format(number), logLevel=logging.DEBUG) head_msg = yield self._get_commit_msg(repo_full_name, head_sha) if self._has_skip(head_msg): log.msg("GitHub PR #{}, Ignoring: " "head commit message contains skip pattern".format(number)) return ([], 'git') action = payload.get('action') if action not in ('opened', 'reopened', 'synchronize'): log.msg("GitHub PR #{} {}, ignoring".format(number, action)) return (changes, 'git') properties = self.extractProperties(payload['pull_request']) properties.update({'event': event}) properties.update({'basename': basename}) change = { 'revision': payload['pull_request']['head']['sha'], 'when_timestamp': dateparse(payload['pull_request']['created_at']), 'branch': refname, 'revlink': payload['pull_request']['_links']['html']['href'], 'repository': payload['repository']['html_url'], 'project': payload['pull_request']['base']['repo']['full_name'], 'category': 'pull', # TODO: Get author name based on login id using txgithub module 'author': payload['sender']['login'], 'comments': 'GitHub Pull Request #{0} ({1} commit{2})\n{3}\n{4}'.format( number, commits, 's' if commits != 1 else '', title, comments), 'properties': properties, } if callable(self._codebase): change['codebase'] = self._codebase(payload) elif self._codebase is not None: change['codebase'] = self._codebase changes.append(change) log.msg("Received {} changes from GitHub PR #{}".format( len(changes), number)) return (changes, 'git')
def _process_change(self, payload, user, repo, repo_url, project, event, properties): """ Consumes the JSON as a python object and actually starts the build. :arguments: payload Python Object that represents the JSON sent by GitHub Service Hook. """ changes = [] refname = payload['ref'] # We only care about regular heads or tags match = re.match(r"^refs/(heads|tags)/(.+)$", refname) if not match: log.msg("Ignoring refname `{}': Not a branch".format(refname)) return changes category = None # None is the legacy category for when hook only supported push if match.group(1) == "tags": category = "tag" branch = match.group(2) if payload.get('deleted'): log.msg("Branch `{}' deleted, ignoring".format(branch)) return changes # check skip pattern in commit message. e.g.: [ci skip] and [skip ci] head_msg = payload['head_commit'].get('message', '') if self._has_skip(head_msg): return changes commits = payload['commits'] if payload.get('created'): commits = [payload['head_commit']] for commit in commits: files = [] for kind in ('added', 'modified', 'removed'): files.extend(commit.get(kind, [])) when_timestamp = dateparse(commit['timestamp']) log.msg("New revision: {}".format(commit['id'][:8])) change = { 'author': '{} <{}>'.format(commit['author']['name'], commit['author']['email']), 'committer': '{} <{}>'.format(commit['committer']['name'], commit['committer']['email']), 'files': files, 'comments': commit['message'], 'revision': commit['id'], 'when_timestamp': when_timestamp, 'branch': branch, 'revlink': commit['url'], 'repository': repo_url, 'project': project, 'properties': { 'github_distinct': commit.get('distinct', True), 'event': event, }, 'category': category } # Update with any white-listed github event properties change['properties'].update(properties) if callable(self._codebase): change['codebase'] = self._codebase(payload) elif self._codebase is not None: change['codebase'] = self._codebase changes.append(change) return changes
def get_markers(db, client_name, clip_id, congress, chamber): api_url = API_PREFIX + client_name + '?type=marker&size=100000' data = '{"filter": { "term": { "video_id": %s}}, "sort": [{"offset":{"order":"asc"}}]}' % clip_id markers = query_api(db, api_url, data) clips = [] bill_ids = [] legislators = [] legislator_ids = [] roll_ids = [] if markers: for m in markers: m_new = m['_source'] c = { 'offset': m_new['offset'], 'events': [ htmlentitydecode(m_new['name']).strip(), ], 'time': m_new['datetime'] } if m != markers[-1]: #if it's not the last one c['duration'] = markers[markers.index( m) + 1]['_source']['offset'] - m_new['offset'] year = dateparse(m_new['datetime']).year legis, bio_ids = python_utils.extract_legislators( c['events'][0], chamber, db) b = python_utils.extract_bills(c['events'][0], congress) r = python_utils.extract_rolls(c['events'][0], chamber, year) if legis: c['legislator_names'] = legis for l in legis: if l not in legislators: legislators.append(l) if bio_ids: c['legislator_ids'] = bio_ids for bi in bio_ids: if bi not in legislator_ids: legislator_ids.append(bi) if r: c['roll_ids'] = r for ro in r: if ro not in roll_ids: roll_ids.append(ro) if b: c['bill_ids'] = b for bill in b: if bill not in bill_ids: bill_ids.append(bill) clips.append(c) return (clips, bill_ids, legislators, legislator_ids, roll_ids) else: db.note('There are no markers for video id: %s' % clip_id) return (None, None, None, None, None)
def get_videos(db, es, client_name, chamber, archive=False, captions=False): api_url = API_PREFIX + client_name + '?type=video' data = '{ "sort": [ {"datetime": {"order": "desc" }} ] }' if archive: api_url += '&size=100000' else: api_url += '&size=2' videos = query_api(db, api_url, data) if not videos: db.warning("Granicus API appears to be down", {'errors': PARSING_ERRORS}) sys.exit() vcount = 0 for vid in videos: v = vid['_source'] legislative_day = dateparse(v['datetime']) video_id = chamber + '-' + str( int(timey.mktime(legislative_day.timetuple()))) new_vid = db.get_or_initialize('videos', {'video_id': video_id}) #initialize arrays and dicts so we don't have to worry about it later if not new_vid.has_key('clip_urls'): new_vid['clip_urls'] = {} if not new_vid.has_key('bill_ids'): new_vid['bill_ids'] = [] if not new_vid.has_key('legislator_ids'): new_vid['legislator_ids'] = [] if not new_vid.has_key('legislator_names'): new_vid['legislator_names'] = [] if not new_vid.has_key('created_at'): new_vid['created_at'] = datetime.now() new_vid['updated_at'] = datetime.now() #video id, clips array, legislators array, bills array new_vid = try_key(v, 'id', 'clip_id', new_vid) new_vid = try_key(v, 'duration', 'duration', new_vid) new_vid = try_key(v, 'datetime', 'published_at', new_vid) # normalize timestamp format to RFC3339 in UTC new_vid['published_at'] = rfc3339(dateparse(new_vid['published_at'])) new_vid['clip_urls'] = try_key(v, 'http', 'mp4', new_vid['clip_urls']) new_vid['clip_urls'] = try_key(v, 'hls', 'hls', new_vid['clip_urls']) new_vid['clip_urls'] = try_key(v, 'rtmp', 'rtmp', new_vid['clip_urls']) new_vid['legislative_day'] = legislative_day.strftime('%Y-%m-%d') new_vid['chamber'] = chamber new_vid['congress'] = python_utils.current_congress( legislative_day.year) if chamber == 'house': new_vid['clips'], new_vid['bill_ids'], new_vid[ 'legislator_names'], new_vid['legislator_ids'], new_vid[ 'roll_ids'] = get_markers(db, client_name, new_vid['clip_id'], new_vid['congress'], chamber) elif chamber == 'senate': new_vid['clips'], new_vid['bill_ids'], new_vid[ 'legislator_names'], new_vid['legislator_ids'], new_vid[ 'roll_ids'] = get_clips_for_senate( db, new_vid['clip_id'], new_vid['congress'], new_vid['duration'], dateparse(new_vid['published_at']).year) if new_vid['clips'] is None: print "Couldn't fetch information for video, skipping." continue #make sure the last clip has a duration if new_vid['clips'] and len(new_vid['clips']) > 0: new_vid['clips'][-1]['duration'] = new_vid['duration'] - new_vid[ 'clips'][-1]['offset'] if captions: new_vid['captions'], new_vid['caption_srt_file'] = get_captions( client_name, new_vid['clip_id']) db['videos'].save(new_vid) vcount += 1 #index clip objects in elastic search if captions and new_vid.has_key( 'clips') and new_vid['clips'] is not None and len( new_vid['clips']) > 0: for c in new_vid['clips']: clip = { 'id': "%s-%s" % (new_vid['video_id'], new_vid['clips'].index(c)), 'video_id': new_vid['video_id'], 'video_clip_id': new_vid['clip_id'], 'offset': c['offset'], 'duration': c['duration'], 'legislative_day': new_vid['legislative_day'], 'published_at': new_vid['published_at'], 'clip_urls': new_vid['clip_urls'] } clip = try_key(c, 'legislator_names', 'legislator_names', clip) clip = try_key(c, 'roll_ids', 'roll_ids', clip) clip = try_key(c, 'events', 'events', clip) clip = try_key(c, 'bill_ids', 'bill_ids', clip) clip = try_key(c, 'legislator_ids', 'legislator_ids', clip) if new_vid.has_key('caption_srt_file'): clip['srt_link'] = new_vid['caption_srt_file'], if new_vid.has_key('captions'): clip['captions'] = get_clip_captions( new_vid, c, c == new_vid['clips'] [0]) #pass a boolean if this is the first clip resp = es.save(clip, 'clips', clip['id']) print "Successfully processed %s" % new_vid['clip_id'] es.connection.indices.refresh() db.success("Updated or created %s legislative days for %s video" % (client_name, vcount))
def gen_new_date_str(datum, start_time_column, end_time_column): new_date = dateparse(datum[start_time_column]) # print(new_date, datum[start_time_column], datum[end_time_column]) return utils.date_to_str_1(new_date)
def _asPythonDate(datestr): return dateparse(datestr)
async def detect_video(video_file, date, name="Today", notes=""): cpus = multiprocessing.cpu_count() experiment_uuid = uuid4() experiment_day = dateparse(date) experiment_dir = os.path.join(config.experiment_dir, str(experiment_uuid)) experiment = (experiment_uuid, experiment_day, name, "detection", notes) try: print("Creating data directory", experiment_dir) os.mkdir(experiment_dir) scaleby = 1 w, h = int(2336 / scaleby), int(1729 / scaleby) # Reads the source video, outputs frames print("Launching Video Reader") video_reader = FFmpeg( video_file, "", (h, w, 1), "-ss 00:00:02.00 -t 00:00:00.50 -vf scale={}:{}".format(w, h), [], False, FrameData, ) print("Launching Database processor") db_proc = DB_Processor(experiment_uuid, experiment_day, name, notes) print("Launching Entry processor") entry_proc = Entry(experiment_uuid) print("Launching Magic pixel processor") magic_proc = MagicPixel() print("Launching Rescale processor") rescale_proc = Rescaler() # Computes a background for a frame, outputs {"frame": frame, "bg": bg} print("Launching Background Modeler") bg_proc = BG(model="simpleMax", window_size=50, img_shape=(h, w, 1)) # Takes a background and a frame, enhances frame to model foreground print("Launching Foreground Modeler") fg_proc = FG() # Takes a foreground frame, binarizes it print("Launching Binary Mask Processor") mask_proc = Binary("legacyLabeled") print("Launching Properties Processor") prop_proc = Properties() print("Launching Crop Processor") crop_proc = Crop_Processor() # A utility to view video pipeline output raw_player = RawPlayer() bg_player = BGPlayer() fg_player = FGPlayer() mask_player = MaskPlayer() crop_player = CropPlayer() meta_player = MetaPlayer() # A utility to clean up datagram resources cleaner = Cleaner() # Todo # print("Launching Crop Writer") # print("Launching Detection Video Writer") # print("Launching Particle Commmitter") # /todo EZ( video_reader, entry_proc, magic_proc, meta_player, rescale_proc, raw_player, bg_proc, fg_proc, mask_proc, cleaner, ).start().join() except Exception as e: print("Uh oh. Something went wrong") traceback.print_exc() # wq.push(None) if os.path.exists(experiment_dir): print("Removing files from", experiment_dir) shutil.rmtree(experiment_dir) else: pass # dbwriter.commit() # wq.push(None) finally: print("Fin.") return experiment_uuid
def datestr_to_datetime(value): rp = value.rfind('(')+1 v = dateparse(value[rp:-1]) print(value, "-->", v) # spyder: test-skip return v
exit() lastFile = stateDir / "last.time" subsJSONFile = confDir / "subscriptions.json" # Overrule the time from which to download video if we've been asked to # keep videos since a certain number of days ago. if args.since is not None: sinceTimestamp = datetime.now() - relativedelta(days=int(args.since)) ic("args.since is set", sinceTimestamp) elif not lastFile.exists(): lastFile.write_text(str(time())) sinceTimestamp = datetime.now() - relativedelta(days=7) ic("lastFile does not exist", str(time()), sinceTimestamp) else: sinceTimestamp = dateparse(lastFile.read_text()) ic("lastFile exists and is read", sinceTimestamp) tmpJSON = json.loads(subsJSONFile.read_text()) ic(len(tmpJSON)) ic(tmpJSON[0]) baseURL = "https://www.youtube.com/feeds/videos.xml?channel_id=" feedURLs = [ baseURL + item["snippet"]["resourceId"]["channelId"] for item in tmpJSON ] ic(feedURLs[:10]) # Nothing is purged by default if args.retain is not None: