def __init__(self, file_handle):
        """Create parser for open file.

        -- file_handle a file object to read from.
        The file must be csv and have a 'time' column in ISO date format.
        """

        # auto detect the dialect from an initial sample
        #dialect = csv.Sniffer().sniff(file_handle.read(1000))
        #file_handle.seek(0)
        csv.DictReader.__init__(self, file_handle)#, dialect=dialect)

        #super(TimeParser, self).__init__(file_handle)#, dialect=dialect)

        self.current_row = dict((k.lower().strip(), v) for k, v in self.next().iteritems())
        self.next_row = dict((k.lower().strip(), v) for k, v in self.next().iteritems())

        self.current_time = dateparse(self.current_row['time'])
        self.next_time = dateparse(self.next_row['time'])

        # we need the list of cameras that have image columns here
        self.cameras = []

        # we need to filter down based on cameras...
        self.camera_sensors = {}
        # but pose is generic... so no need to list things
        self.pose_sensors = []
Exemple #2
0
def main(args):
    '''
    Generates statistics on all repositories
    '''
    start = dateparse(args.start_date)
    end = dateparse(args.end_date)
    log.info("Initializing %s", args.filename)
    with open(args.filename, 'w') as csvfile:
        csvwriter = csv.writer(csvfile, delimiter=',', quotechar="'", quoting=csv.QUOTE_MINIMAL)

        csvwriter.writerow([
            'repo',
            'issues created',
            'issues closed',
            'pull requests opened',
            'pull requests closed',
            'comments created',
            'commits',
            'releases'])

        for repo in REPOS:
            log.info("Getting report statistics on %s", repo)
            stats = (
                repo,
                count_issues_created(repo, start, end),
                count_issues_closed(repo, start, end),
                count_pull_requests_opened(repo, start, end),
                count_pull_requests_closed(repo, start, end),
                count_comments_created(repo, start, end),
                count_commits(repo, start, end),
                count_releases(repo, start, end)
            )
            csvwriter.writerow([str(i) for i in stats])
Exemple #3
0
def skede_from_skededict(data_dict, filing_number, header_row_id, is_amended, cd):
    data_dict['transaction_id'] = data_dict['transaction_id'][:20]
    data_dict['header_id'] = header_row_id
    data_dict['superceded_by_amendment'] = is_amended
    data_dict['filing_number'] = filing_number
        
    ## The switch from v.8 to v.8.1 added a 'dissemination date' though it kept the expenditure date.
    ## We now prefer the dissemination date, but fall back to the expenditure date if it's not available.
    ## The spec says that not having either is an error, so... 
    
    datefound = False
    try: 
        data_dict['expenditure_date_formatted'] = dateparse(data_dict['dissemination_date'])
        datefound = True
    except ValueError:
        pass
    except KeyError:
        pass
        
    if not datefound:
        try:
            data_dict['expenditure_date_formatted'] = dateparse(data_dict['expenditure_date'])
            datefound = True
        except ValueError:
            pass
        except KeyError:
            pass
    

    data_dict['expenditure_amount'] = validate_decimal(data_dict['expenditure_amount'])
    data_dict['calendar_y_t_d_per_election_office'] = validate_decimal(data_dict['calendar_y_t_d_per_election_office'])

    #model_instance = SkedE()
    
    cd.writerow('E', data_dict)
Exemple #4
0
def parse(root, UNITS):
    value = root.find("./pod[@id='Result']").find('subpod').find('plaintext').text

    print value
    if value.startswith('~~ '):
        value = value.strip('~~ ')
    m = __number_re.search(value)

    if m:
        QUANTITY = float(m.group(1))
        UNIT = m.group(2).lower()

        if "trillion" in UNIT:
            QUANTITY *= pow(10, 12)
        elif "billion" in UNIT:
            QUANTITY *= pow(10, 9)
        elif "million" in UNIT:
            QUANTITY *= pow(10, 6)
        elif "thousand" in UNIT:
            QUANTITY *= pow(10, 3)

        elif "date" in UNITS:

            try:
                print "F**K YOU 2"
                dt = dateparse(str(int(QUANTITY)))    
                QUANTITY = (dt - datetime.datetime(1970, 1, 1)).total_seconds()

            except Exception as e:

                raise NameError("Exception")

        if not UNITS:
            if "$" in value:
                UNIT = "dollars"
        else:
            UNIT = UNITS

    else:
        # check if it is a date
        try:
            print value
            if len(value) == 4:
                epoch = datetime(1970, 1, 1)
                t = datetime(int(value), 1, 1)
                diff = t-epoch
                QUANTITY = diff.total_seconds()
                print QUANTITY
            else:
                print "Not 4 chars"
                print value
                dt = dateparse(value)
                QUANTITY = (dt - datetime.datetime(1970, 1, 1)).total_seconds()
            UNIT = "date"

        except:
            raise NameError('Could not parse!')

    print QUANTITY
    return (QUANTITY, UNIT)
Exemple #5
0
def moin2atomentries(wikibase, outputdir, rewrite, pattern):
    wikibase_len = len(rewrite)
    if pattern: pattern = re.compile(pattern)
    #print (wikibase, outputdir, rewrite)
    req = urllib2.Request(wikibase, headers={'Accept': RDF_IMT})
    with closing(urllib2.urlopen(req)) as resp:
        feed = bindery.parse(resp)
    for item in feed.RDF.channel.items.Seq.li:
        uri = split_fragment(item.resource)[0]
        relative = uri[wikibase_len:]
        print >> sys.stderr, uri, relative
        if pattern and not pattern.match(relative):
            continue
        if rewrite:
            uri = uri.replace(rewrite, wikibase)
        req = urllib2.Request(uri, headers={'Accept': DOCBOOK_IMT})
        with closing(urllib2.urlopen(req)) as resp:
            page = bindery.parse(resp)
        entrydate = dateparse(unicode(page.article.articleinfo.revhistory.revision.date))
        if entrydate.tzinfo == None: entrydate = entrydate.replace(tzinfo=DEFAULT_TZ)
        output = os.path.join(outputdir, OUTPUTPATTERN%pathsegment(relative))
        if os.access(output, os.R_OK):
            lastrev = dateparse(unicode(bindery.parse(output).entry.updated))
            if lastrev.tzinfo == None: lastrev = lastrev.replace(tzinfo=DEFAULT_TZ)
            if (entrydate == lastrev):
                print >> sys.stderr, 'Not updated.  Skipped...'
                continue
        print >> sys.stderr, 'Writing to ', output
        with open(output, 'w') as output:
            handle_page(uri, page, outputdir, relative, output)
    return
    def read(self, request, *args, **kwargs):
        allowed_entities = {
                'legislator': {
                    'model': NgramsByBioguide,
                    'field': 'bioguide_id',
                    },
                'state': {
                    'model': NgramsByState,
                    'field': 'state',
                    },
                'date': {
                    'model': NgramsByDate,
                    'field': 'date',
                    },
                'month': {
                    'model': NgramsByMonth,
                    'field': 'month',
                    },
                }

        n = request.GET.get('n', 1)
        try:
            n = int(n)
        except ValueError:
            return {'error': 'Invalid phrase length.', 'results': []}

        if n > 5:
            return {'error': 'Invalid phrase length.', 'results': []}

        entity = request.GET.get('entity_type', '')
        if entity not in allowed_entities.keys():
            return {'error': 'Invalid entity.', 'results': []}

        entity = allowed_entities[entity]
        val = request.GET.get('entity_value', '')
        if not val:
            return {'error': 'Invalid entity value.', 'results': []}
        sort = request.GET.get('sort', 'tfidf desc')
        if sort not in self.SORT_FIELDS.keys():
            return {'error': 'Invalid sort field.', 'results': []}

        per_page, offset = self.get_pagination(request)

        model = entity['model']
        field = entity['field']

        if field == 'date' and val:
            try:
                dateparse(val)
            except ValueError:
                return {'error': 'Invalid date.', 'results': []}

        query = {'n': n}
        if val:
            query.update({field: val})
        qset = model.objects.filter(**query)
        if sort:
            qset = qset.order_by(self.SORT_FIELDS[sort])
        return qset[offset:offset+per_page]
Exemple #7
0
 def fromJSON(self, j):
     self.href = j['d:href']
     prop = j['d:propstat']['d:prop']
     self.name = prop['d:displayname']
     self.length = prop['d:getcontentlength']
     self.modified = dateparse(prop['d:getlastmodified'])
     self.created = dateparse(prop['d:creationdate'])
     return self
def test_agent_last_update_increases(volttron_instance):
    agent = volttron_instance.build_agent()
    s = json.loads(agent.vip.health.get_status())
    dt = dateparse(s['last_updated'], fuzzy=True)
    agent.vip.health.set_status(STATUS_UNKNOWN, 'Unknown now!')
    gevent.sleep(1)
    s = json.loads(agent.vip.health.get_status())
    dt2 = dateparse(s['last_updated'], fuzzy=True)
    assert dt < dt2
Exemple #9
0
    def __read_tasks(self, tlist, parent=None):
        '''Internal function to recursively add tasks from an XML file to the treestore self.tasklist.'''
        for task in tlist.iterfind('./task'):
            #make a list from subelements and attributes to add to the treestore
            tasks = []
            tasks.append(int(task.get('priority')))
            tasks.append(int(task.find('pct').text))
            tasks.append(int(task.find('est').text))
            tasks.append(int(task.find('spent').text))
            est_begin_raw = task.find('est-begin').text
            if est_begin_raw is None:
                tasks.append("")
            else:
                tasks.append(dateparse(est_begin_raw))
            est_complete_raw = task.find('est-complete').text
            if est_complete_raw is None:
                tasks.append("")
            else:
                tasks.append(dateparse(est_complete_raw))
            act_begin_raw = task.find('act-begin').text
            if act_begin_raw is None:
                tasks.append("")
            else:
                tasks.append(dateparse(act_begin_raw))
            completed_raw = task.find('completed').text
            if completed_raw is None:
                tasks.append("")
            else:
                tasks.append(dateparse(completed_raw))
            due_raw = task.find('due').text
            if due_raw is None:
                tasks.append("")
            else:
                tasks.append(dateparse(due_raw))
            assigner_raw = task.find('assigner').text
            if assigner_raw is None: assigner_raw = ''
            tasks.append(assigner_raw)
            assignee_raw = task.find('assignee').text
            if assignee_raw is None: assignee_raw = ''
            tasks.append(assignee_raw)
            status_raw = task.find('status').text
            if status_raw is None: status_raw = ''
            tasks.append(status_raw)
            done = task.get('done') == "True"
            tasks.append(done)
            tasks.append(task.find('title').text)
            notes_raw = task.find('notes').text
            if notes_raw is None: notes_raw = ''
            tasks.append(notes_raw)
            tasks.append(task.find('due').get('useTime') == "True")
            tasks.append(not done) #inverse done
            tasks.append(False) #time track flag

            #append to store
            treeiter = self.tasklist.append(parent, tasks)
            self.__read_tasks(task.find('tasklist'), treeiter)
Exemple #10
0
def phrase_by_category(phrase, entity_type, start_date=None, end_date=None, mincount=1, sort='false'):
    '''finds occurences of a specific phrase by entity_type. expects
    dates in dd/mm/yyyy format. if 'start' and 'end' date are none, defaults
    to all time. the mincount argument controls whether counts are returned for all
    entities in the category, or only those with non-zero results.''' 
    args = {}

    if isinstance(start_date, basestring):
        start_date = dateparse(start_date).strftime('%d/%m/%Y')
    if isinstance(end_date, basestring):
        end_date = dateparse(end_date).strftime('%d/%m/%Y')
    
    # set up the faceting. many of these query args need to be set using a
    # string variable for the key since they contain periods. 

    args['facet'] = "true"
    if entity_type == 'legislator':
        field = 'speaker_bioguide'
    elif entity_type == 'state':
        field = 'speaker_state'
    elif entity_type == 'party':
        field = 'speaker_party'
    elif entity_type == 'bioguide':
        field = 'speaker_bioguide'
    else:
        raise NotImplementedError(entity_type)
    args['facet.field'] = field

    if mincount:
        args['facet.mincount'] = 1

    args['facet.sort'] = sort

    # default limit for # faceted fields returned is 100; we want to return for
    # all fields. 
    facet_limit = 'facet.limit'
    args[facet_limit] = -1

    q = '''text:"%s"''' % phrase
    if start_date and end_date:
        start = as_solr_date(start_date)
        end = as_solr_date(end_date)
        daterange = '''date:[%s TO %s]''' % (start, end)
        q = '''(%s AND %s)''' % (q, daterange)
    args['q'] = q 

    # return counts only, not the documents themselves
    args['rows'] = 0

    # do the api call
    json_resp = solr_api_call(args)

    # remove any cruft and format nicely. 
    return json_resp
Exemple #11
0
def generic_query(*args, **kwargs):
    q = []
    args = {}

    if 'date' in kwargs:
        date = dateparse(kwargs['date'])
        start = date.strftime('%d/%m/%Y')
        end = (date + datetime.timedelta(1)).strftime('%d/%m/%Y')
        q.append("date:[%s TO %s]" % (as_solr_date(start), as_solr_date(end)))

    elif 'start_date' in kwargs and 'end_date' in kwargs:
        start = dateparse(kwargs['start_date']).strftime('%d/%m/%Y')
        end = dateparse(kwargs['end_date']).strftime('%d/%m/%Y')
        q.append("date:[%s TO %s]" % (as_solr_date(start), as_solr_date(end)))

    if 'phrase' in kwargs:
        q.append('text:%s' % kwargs['phrase'])

    if 'congress' in kwargs:
        volumes = volume_lookup(kwargs['congress'], kwargs.get('session'))
        if not volumes:
            volumes = ['0', ]
        q.append('volume:(%s)' % ' OR '.join(volumes))

    if 'chamber' in kwargs:
        valid_chambers = ['house',
                          'senate',
                          'extensions', ]
        selected_chambers = []
        for chamber in kwargs['chamber'].lower().split('|'):
            if chamber in valid_chambers:
                selected_chambers.append(chamber)
        if selected_chambers:
            q.append('chamber:(%s)' % ' OR '.join([x.title() for x in selected_chambers]))

    entities = {'state': 'speaker_state',
                'party': 'speaker_party',
                'legislator': 'speaker',
                'bioguide': 'speaker_bioguide', 
                'cr_pages': 'pages',
                'volume': 'volume',
                }

    for k, v in entities.iteritems():
        if k in kwargs:
            q.append('%s:%s' % (v, kwargs[k]))

    if len(q):
        args['q'] = '(%s)' % ' AND '.join(q)
    else:
        args['q'] = '*:*'

    return args
Exemple #12
0
def get_noao_query_kwargs(**kwargs):
    """
    Get the NOAO download query.
    """
    # Some columns are required
    required = [
        'reference', 
        'release_date', 
        'start_date', 
        'filesize', 
        'dtpropid', 
        'md5sum'
    ]

    defaults = dict(tstart=dateparse('2012-11-01'), tstop=date.today(),
                    exptime=30,filters=('u','g','r','i','z','Y'),
                    limit=250000,expnum='%')

    defaults['columns'] = [
        'reference', 
        'release_date', 
        'start_date', 
        'date_obs', 
        'instrument', 
        'ra', 
        'dec', 
        'filter', 
        'exposure', 
        'obstype', 
        'proctype', 
        'dtacqnam AS original_file', 
        'reference AS archive_file',
        'filesize',
        ]

    for k,v in defaults.items():
        kwargs.setdefault(k,v)

    kwargs['columns'] = map(str.lower,kwargs['columns'])
    kwargs['columns'] += [c for c in required if c not in kwargs['columns']]

    kwargs['tstart'] = dateparse(str(kwargs['tstart']))
    kwargs['tstop']  = dateparse(str(kwargs['tstop']))

    if not isinstance(kwargs['columns'],basestring):
        kwargs['columns'] = ','.join(kwargs['columns'])
    if not isinstance(kwargs['filters'],basestring):
        kwargs['filters'] = ','.join(["'%s'"%f for f in kwargs['filters']])
    if isinstance(kwargs['expnum'],int):
        kwargs['expnum'] = '{expnum:08d}'.format(**kwargs)

    return kwargs
 def handle(self, *args, **options):
     if not args:
         raise CommandError('Invalid arguments, must provide: %s' % self.args)
     
     filename = args[0]
     print "Processing file '%s' " % (filename)
     
     infile = open(filename, 'r')
     
     create_new_ads = options['create']
     if create_new_ads:
         print "Will create new ads when applicable"
     
     
     extra_rows = int(options['extra_rows'])
     if extra_rows:
         print "Disregarding first %s rows from csv file" % (extra_rows) 
         # Skip the first n lines before looking for headers, if requested.
         for i in range(0,extra_rows):
             next(infile)
     
     reader = None
     excel = options['excel']
     if excel:
         print "Trying to parse csv using excel dialect"
         reader = csv.DictReader(infile, dialect='excel')
     else:
         reader = csv.DictReader(infile)
         
     
     for row in reader:
         this_row_data = {}
         for key in default_options.keys():
             try:
                 this_row_data[key] = row[default_options[key]]
             except KeyError:
                 this_row_data[key] = None
         
         # get date objects for the dates entered. Assumes the dates don't need additional transformation.
         if this_row_data['contract_start_date']:
             this_row_data['contract_start_date'] = dateparse(this_row_data['contract_start_date'])
         if this_row_data['contract_end_date']:
             this_row_data['contract_end_date'] = dateparse(this_row_data['contract_end_date'])
         if this_row_data['upload_time']:
             this_row_data['upload_time'] = dateparse(this_row_data['upload_time']).date()
         
         
         
         #print this_row_data
         
         handle_row_data(this_row_data, create_new_ads)
Exemple #14
0
    def ps(self):
        all_containers = []

        # Collect the system containers
        for i in self.syscontainers.get_system_containers():
            container = i["Id"]
            inspect_stdout = util.check_output(["runc", "state", container])
            ret = json.loads(inspect_stdout)
            status = ret["status"]
            if not self.args.all and status != "running":
                continue

            image = i['Image']
            command = ""
            created = dateparse(ret['created']).strftime("%F %H:%M") # pylint: disable=no-member
            all_containers.append({"type" : "systemcontainer", "container" : container,
                                   "image" : image, "command" : command, "created" : created,
                                   "status" : status, "runtime" : "runc"})

        # Collect the docker containers
        for container in [x["Id"] for x in self.d.containers(all=self.args.all)]:
            ret = self._inspect_container(name=container)
            status = ret["State"]["Status"]
            image = ret['Config']['Image']
            command = u' '.join(ret['Config']['Cmd']) if ret['Config']['Cmd'] else ""
            created = dateparse(ret['Created']).strftime("%F %H:%M") # pylint: disable=no-member
            all_containers.append({"type" : "docker", "container" : container,
                                   "image" : image, "command" : command,
                                   "created" : created, "status" : status, "runtime" : "Docker"})

        if self.args.json:
            self.write_out(json.dumps(all_containers))
            return

        col_out = "{0:12} {1:20} {2:20} {3:16} {4:9} {5:10}"
        if self.args.heading:
            self.write_out(col_out.format("CONTAINER ID",
                                          "IMAGE",
                                          "COMMAND",
                                          "CREATED",
                                          "STATUS",
                                          "RUNTIME"))

        for container in all_containers:
            self.write_out(col_out.format(container["container"][0:12],
                                          container["image"][0:20],
                                          container["command"][0:20],
                                          container["created"][0:16],
                                          container["status"][0:9],
                                          container["runtime"][0:10]))
def scrape_csv_file():
    url = 'http://www.fdic.gov/regulations/meetings/vlog.csv'
    reader = csv.DictReader(urllib2.urlopen(url))
    return [{
        'staff': parse_staff(row['Person Visited']),
        'meeting_time': dateparse(row['Date']),
        'disclosure_time': dateparse(row['Creation Date']),
        'organizations': parse_organizations(row['Affiliation']),
        'visitors': parse_visitors(row['Visitor']),
        'material_provided': row['Material Provided'],
        'description': row['Issues Discussed'].replace(':', '; ')
    }
    for row in reader
    if row['Status'].strip() == 'PUBLISH']
Exemple #16
0
def main(args):
    '''
    Generates a CSV for the issues for a specific repo
    '''
    log.info('Initializing %s', args.filename)
    start = dateparse(args.start_date)
    end = dateparse(args.end_date)
    log.info('Getting issues from github %s', args.repository)
    response = issues(args.repository, start, end)
    rows = [(str(r['number']), r['title'], r['state'], str(r['closed_at'])) for r in response]
    with open(args.filename, 'wb') as csvfile:
        writer = UnicodeWriter(csvfile)
        writer.writerow(('Issue Number', 'Title', 'Status', 'Closed At'))
        writer.writerows(rows)
    log.info('Done')
Exemple #17
0
def parseyears(relationship):
    try:
        startyear = int(relationship['commenced dating'])
        endyear = int(relationship['separated'])
    except ValueError:
        startyear = dateparse(relationship['commenced dating']).year
        endyear = dateparse(relationship['separated']).year
    except KeyError:
        startyear = None
        endyear = None
    finally:
        try:
            return startyear, endyear
        except UnboundLocalError:
            return startyear, None
Exemple #18
0
 def fromXML(self, j):
     [href, propstat] = j.getchildren()
     self.href = href.text
     [status, prop] = propstat.getchildren()
     for x in prop:
         tag = x.tag.replace(D, '')
         if   tag == 'displayname':
             self.name = x.text
         elif tag == 'getcontentlength':
             self.length = int(x.text)
         elif tag == 'getlastmodified':
             self.modified = dateparse(x.text)
         elif tag == 'creationdate':
             self.created = dateparse(x.text)
     return self
Exemple #19
0
 def rstrip_date(s):
     date = re.search(r'.+?(\([^(]+)$', s)
     if date is not None:
         possible_date = date.group(1).split(')')[0].strip(', \n(')
         if possible_date.isdigit():
             if re.search(r'^[12]\d\d\d$', possible_date) is not None:
                 return s[:date.start(1)].strip()
             else:
                 return s
         try:
             dateparse(possible_date)
             s = s[:date.start(1)].strip()
         except (ValueError, TypeError):
             pass
     return s
Exemple #20
0
def _format_bill(bill):
    bill = bill.copy()
    btype = bill_type_for(bill['bill_id'])
    bnumber = bill.get('number') or bill_number_for(bill['bill_id'])
    bdate = bill.get('legislative_day') or bill.get('last_action_at')
    try:
        bdate = dateparse(bdate).strftime('%B %e')
    except:
        bdate = 'unknown date'
    title = (bill.get('popular_title') or
             bill.get('short_title') or
             bill.get('official_title') or '')
    ctx = bill.get('context', [])
    bill['summary'] = bill.get('summary') or ''
    bill_context = {
        'date': bdate,
        'chamber': bill['chamber'],
        'bill_type': btype,
        'bill_number': bnumber,
        'bill_title': title.encode('ascii', 'ignore'),
        'bill_description': '\n'.join(ctx).encode('ascii', 'ignore'),
    }
    if len(bill.get('actions', [])):
        bill_context.update(bill_status="%s on %s" % (bill['last_action'].get('text'),
                                                      dateparse(bill['last_action'].get('acted_at')).strftime('%B %e, %Y')))
    else:
        bill_context.update(bill_status='No known actions taken yet.')

    sponsor = bill.get('sponsor')
    if sponsor:
        sponsor_party = sponsor.get('party')
        sponsor_state = sponsor.get('state')
        if sponsor_party and sponsor_state:
            bill_context.update(sponsor="Sponsored by: %s, %s, %s" % (_format_legislator(sponsor)['fullname'],
                                                                  party_for(sponsor['party']),
                                                                  state_for(sponsor['state'])))
        else:
            bill_context.update(sponsor="Sponsored by: %s" % _format_legislator(sponsor)['fullname'])

    cosponsors = bill.get('cosponsors', [])
    if len(cosponsors):
        bill_context.update(cosponsors="Cosponsored by: %s" %
            ', '.join(["%s, %s, %s" % (_format_legislator(cs)['fullname'],
                                       party_for(cs['party']),
                                       state_for(cs['state'])) for cs in cosponsors]))

    bill.update(bill_context=bill_context)
    return bill
def discover(s):
    if not s:
        return null

    for f in string_coercions:
        try:
            return discover(f(s))
        except:
            pass

    # don't let dateutil parse things like sunday, monday etc into dates
    if s.isalpha():
        return string

    try:
        d = dateparse(s)
        if not d.time():
            return date_
        if not d.date():
            return time_
        return datetime_
    except:
        pass

    return string
Exemple #22
0
def skeda_from_f133(data_dict, filing_number, header_row_id, is_amended, cd):
    data_dict['transaction_id'] = data_dict['transaction_id'][:20]
    data_dict['header_id'] = header_row_id
    data_dict['superceded_by_amendment'] = is_amended
    data_dict['filing_number'] = filing_number
    
    # map refund to contributions
    data_dict['contribution_amount'] = data_dict['refund_amount']
    data_dict['contribution_date'] = data_dict['refund_date']    
    
    # so logging doesn't complain about unexpected value
    del data_dict['refund_date']
    del data_dict['refund_amount']
    
    data_dict['contribution_amount'] = validate_decimal(data_dict['contribution_amount'])
    
    # flip signs if this is positive. 
    if data_dict['contribution_amount']  > 0:
        data_dict['contribution_amount'] = 0-data_dict['contribution_amount']

    if data_dict['contribution_date']:
        try:
            data_dict['contribution_date_formatted'] = dateparse(data_dict['contribution_date'])
        except ValueError:
            # if we can't parse the date, just ignore it. 
            pass

    cd.writerow('A', data_dict)
Exemple #23
0
    def get_context_data(self, **kwargs):
        context = super(SingleDayView, self).get_context_data(**kwargs)

        session_list = self.get_queryset()
        for sess in list(session_list):
            sess.start_time = sess.start_time.astimezone(timezone.get_current_timezone())

        locations = Location.objects.with_sessions().filter(event=self.event,
                                                            sessions__in=context['session_list']
                                                            ).distinct()
        try:
            lunchtime = self.get_queryset().filter(
                title__istartswith='lunch')[0].start_time.astimezone(timezone.get_current_timezone())
        except IndexError:
            lunchtime = None

        timeslots = self.request.GET.get('timeslots', '').split(',')
        timeslots = [dateparse(time).time() for time in timeslots]

        context['session_list'] = session_list
        context['event'] = self.event
        context['locations'] = locations
        context['timeslots'] = timeslots
        context['lunchtime'] = lunchtime
        context['now'] = timezone.now().astimezone(timezone.get_current_timezone())
        context['now_minus_session_length'] = context['now'] - context['event'].session_length
        return context
Exemple #24
0
def _process_change(payload, user, repo, repo_url, project, event,
                    codebase=None):
    """
    Consumes the JSON as a python object and actually starts the build.

    :arguments:
        payload
            Python Object that represents the JSON sent by GitHub Service
            Hook.
    """
    changes = []
    refname = payload['ref']

    # We only care about regular heads or tags
    match = re.match(r"^refs/(heads|tags)/(.+)$", refname)
    if not match:
        log.msg("Ignoring refname `%s': Not a branch" % refname)
        return changes

    branch = match.group(2)
    if payload.get('deleted'):
        log.msg("Branch `%s' deleted, ignoring" % branch)
        return changes

    for commit in payload['commits']:
        if not commit.get('distinct', True):
            log.msg('Commit `%s` is a non-distinct commit, ignoring...' %
                    (commit['id'],))
            continue

        files = []
        for kind in ('added', 'modified', 'removed'):
            files.extend(commit.get(kind, []))

        when_timestamp = dateparse(commit['timestamp'])

        log.msg("New revision: %s" % commit['id'][:8])

        change = {
            'author': '%s <%s>' % (commit['author']['name'],
                                   commit['author']['email']),
            'files': files,
            'comments': commit['message'],
            'revision': commit['id'],
            'when_timestamp': when_timestamp,
            'branch': branch,
            'revlink': commit['url'],
            'repository': repo_url,
            'project': project,
            'properties': {
                'event': event,
            },
        }

        if codebase is not None:
            change['codebase'] = codebase

        changes.append(change)

    return changes
Exemple #25
0
def readfile(filelocation):
    fh = open(filelocation, 'r')
    reader = csv.DictReader(fh)
    count = 0
    for newstyle_row in reader:
        row = transform_column_headers(newstyle_row)
        try:
            thiscom = f1filer.objects.get(cmte_id=row['cmte_id'])
        except f1filer.DoesNotExist:
            print "Creating %s %s" % (row['cmte_id'], row['cmte_nm'])
            # first create the f1filer object:
            row['cycle'] = CURRENT_CYCLE
            try:
                row['receipt_dt'] = dateparse(row['receipt_dt'])
            except:
                print "can't parse original receipt date='%s', skipping" % (row['receipt_dt'])
                continue
            
            try:
                del row[None]
            except KeyError:
                pass
                
            print row
            thisf1 = f1filer(**row)
            thisf1.save()
            
            ## if we are creating a new f1, check if it's a committee and if not, create one. 
            make_committee_from_f1filer(row['cmte_id'], row['cycle'])
Exemple #26
0
    def handle(self, *args, **options):
        event = Event.objects.get(pk=int(options.get('event_id')))
        timeslot = options.get('timeslot')
        skipdelta = options.get('skipdelta')

        if skipdelta:
            skipdelta = datetime.timedelta(seconds=int(options.get('skipdelta')))
        else:
            skipdelta = None

        if timeslot == 'next':
            sessions = Session.objects.next().filter(event=event)
            timeslot = sessions[0].start_time
        else:
            timeslot = dateparse(timeslot).replace(tzinfo=timezone.get_current_timezone())

        if skipdelta is not None and timezone.now() + skipdelta < timeslot:
            print 'Sessions are too far in the future, aborting.'
            return

        try:
            tweet = SessionBlockTweet.objects.get(event=event, timeslot=timeslot,
                                                  previous=None, sent_at=None)
        except SessionBlockTweet.DoesNotExist:
            print 'No tweets have been generated for this timeslot, or tweets have been sent already. Run ./manage.py generatetweets --event-id=%s --timeslot=%s and try again' % (event.id, timeslot.isoformat())
            return

        tweet.send()
        print 'Sent %d tweets for block %s.' % (tweet.total, timeslot.isoformat())
Exemple #27
0
def format_tweet(tweet, tweeter_db_id):
    t = dict( text = tweet['text'],
              created_at = dateparse(tweet['created_at']),
              favorite_counts = tweet['favorite_count'],
              retweet_counts = tweet['retweet_count'],
              tweeter_id = tweeter_db_id )
    return Tweet(**t)
Exemple #28
0
 def version_dates(self):
     ret = OrderedDict()
     for release, info in self.release_info:
         if info:
             upload_time = dateparse(info[0]['upload_time'])
             ret[release] = upload_time
     return ret
Exemple #29
0
    def handle_pull_request(self, payload):
        changes = []
        number = payload["number"]
        commits = payload["pull_request"]["commits"]

        log.msg("Processing GitHub PR #%d" % number, logLevel=logging.DEBUG)

        action = payload.get("action")
        if action not in ("opened", "reopened", "synchronize"):
            log.msg("GitHub PR #%d %s, ignoring" % (number, action))
            return changes, "git"

        change = {
            "revision": payload["pull_request"]["head"]["sha"],
            "when_timestamp": dateparse(payload["pull_request"]["created_at"]),
            "branch": payload["pull_request"]["head"]["ref"],
            "revlink": payload["pull_request"]["_links"]["html"]["href"],
            "repository": payload["repository"]["clone_url"],
            "category": "pull",
            # TODO: Get author name based on login id using txgithub module
            "author": payload["sender"]["login"],
            "comments": "GitHub Pull Request #%d (%d commit%s)" % (number, commits, "s" if commits != 1 else ""),
        }

        if callable(self._codebase):
            change["codebase"] = self._codebase(payload)
        elif self._codebase is not None:
            change["codebase"] = self._codebase

        changes.append(change)

        log.msg("Received %d changes from GitHub PR #%d" % (len(changes), number))
        return changes, "git"
Exemple #30
0
  def create_action_details_from_trace (self, project, labels, time_ranges,
      threads, trace_info, extended_info):

    if (type(labels) is not list):
      return []

    results = {}
    first_paint_time = None
    dom_content_loaded_time = None
    load_time = None
    create_action_if_needed = (len(labels) == 0)
    to_save = []

    def sum(l):
      total = 0
      for v in l:
        total += v
      return total

    # Default the trace date to the time the blob was uploaded.
    trace_date = trace_info.date
    if ('datetime' in extended_info):
      try:
        # Get the date, parse it, and set back to UTC for AppEngine
        trace_date = dateparse(extended_info['datetime'])
        trace_date = trace_date.replace(tzinfo=None) + trace_date.utcoffset()
      except Exception, e:
        # Fail nicely
        trace_date = trace_info.date
Exemple #31
0
    def handle_release(self, payload, event):

        repo_url = payload['repository']['html_url']
        release = payload['release']['tag_name']
        project = payload['repository']['full_name']
        title = u'{} [{} release {}]'.format(payload['release']['name'],
                                             project, release)
        comment = payload['release']['body']
        change = {
            'author':
            u'{} <{}>'.format(payload['release']['author']['full_name'],
                              payload['release']['author']['email']),
            'branch':
            release,
            'category':
            'release',
            'comments':
            u'{}\n{}'.format(title, comment),
            'revision':
            release,
            'when_timestamp':
            dateparse(payload['release']['created_at']),
            'repository':
            payload['repository']['clone_url'],
            'project':
            project,
            'properties': {
                'action': payload['action'],
                'draft': payload['release']['draft'],
                'prerelease': payload['release']['prerelease'],
                'event': event,
            },
            'revlink':
            u'{}/src/{}'.format(repo_url, release),
        }

        return [
            change,
        ]
Exemple #32
0
    def get_commit(self, commit):
        files = []
        for kind in ('added', 'modified', 'removed'):
            files.extend(commit.get(kind, []))

        change = {
            'author':
            u'{} <{}>'.format(commit['author']['name'],
                              commit['author']['email']),
            'files':
            files,
            'comments':
            commit['message'],
            'revision':
            commit['id'],
            'when_timestamp':
            dateparse(commit['timestamp']),
            'revlink':
            commit['url'],
        }

        return change
Exemple #33
0
def day(args, *extra, **kwargs):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'username',
        help='The MyFitnessPal username for which to delete a stored password.'
    )
    parser.add_argument('date',
                        nargs='?',
                        default=datetime.now().strftime('%Y-%m-%d'),
                        type=lambda datestr: dateparse(datestr).date(),
                        help=u'The date for which to display information.')
    args = parser.parse_args(extra)

    password = get_password_from_keyring_or_interactive(args.username)
    client = Client(args.username, password)
    day = client.get_date(args.date)

    t = Terminal()

    print(t.blue(args.date.strftime('%Y-%m-%d')))
    for meal in day.meals:
        print(t.bold(meal.name.title()))
        for entry in meal.entries:
            print(u'* {entry.name}'.format(entry=entry))
            print(
                t.italic_bright_black(
                    u'  {entry.nutrition_information}'.format(entry=entry)))
        print(u'')

    print(t.bold("Totals"))
    for key, value in day.totals.items():
        print(u'{key}: {value}'.format(
            key=key.title(),
            value=value,
        ))
    print(u'Water: {amount}'.format(amount=day.water))
    if day.notes:
        print(t.italic(day.notes))
def day(args, *extra, **kwargs):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "username",
        help="The MyFitnessPal username for which to delete a stored password.",
    )
    parser.add_argument(
        "date",
        nargs="?",
        default=datetime.now().strftime("%Y-%m-%d"),
        type=lambda datestr: dateparse(datestr).date(),
        help="The date for which to display information.",
    )
    args = parser.parse_args(extra)

    password = get_password_from_keyring_or_interactive(args.username)
    client = Client(args.username, password)
    day = client.get_date(args.date)

    date_str = args.date.strftime("%Y-%m-%d")
    print(f"[blue]{date_str}[/blue]")
    for meal in day.meals:
        print(f"[bold]{meal.name.title()}[/bold]")
        for entry in meal.entries:
            print(f"* {entry.name}")
            print(f"  [italic bright_black]{entry.nutrition_information}"
                  f"[/italic bright_black]")
        print("")

    print("[bold]Totals[/bold]")
    for key, value in day.totals.items():
        print("{key}: {value}".format(
            key=key.title(),
            value=value,
        ))
    print(f"Water: {day.water}")
    if day.notes:
        print(f"[italic]{day.notes}[/italic]")
Exemple #35
0
    def getChanges(self, request):
        """Catch a POST request from BitBucket and start a build process

        Check the URL below if you require more information about payload
        https://confluence.atlassian.com/display/BITBUCKET/POST+Service+Management

        :param request: the http request Twisted object
        :param options: additional options
        """

        event_type = request.getHeader(_HEADER_EVENT)
        event_type = bytes2unicode(event_type)
        payload = json.loads(bytes2unicode(request.args[b'payload'][0]))
        repo_url = '{}{}'.format(
            payload['canon_url'], payload['repository']['absolute_url'])
        project = request.args.get(b'project', [b''])[0]
        project = bytes2unicode(project)

        changes = []
        for commit in payload['commits']:
            changes.append({
                'author': commit['raw_author'],
                'files': [f['file'] for f in commit['files']],
                'comments': commit['message'],
                'revision': commit['raw_node'],
                'when_timestamp': dateparse(commit['utctimestamp']),
                'branch': commit['branch'],
                'revlink': '{}commits/{}'.format(repo_url, commit['raw_node']),
                'repository': repo_url,
                'project': project,
                'properties': {
                    'event': event_type,
                },
            })
            log.msg('New revision: {}'.format(commit['node']))

        log.msg('Received {} changes from bitbucket'.format(len(changes)))
        return (changes, payload['repository']['scm'])
Exemple #36
0
    def get_initial_queryset(self):
        calls = PhoneCall.objects.get_phone_calls_for_user(
            self.request.user).prefetch_related('content_object')

        filter_kwargs = {}

        if self.request.GET.get('call_date'):
            filter_kwargs['call_date'] = dateparse(
                self.request.GET.get('call_date')).strftime('%Y-%m-%d')

        if self.request.GET.get('city'):
            filter_kwargs['city'] = self.request.GET.get('city')

        if self.request.GET.get('state'):
            filter_kwargs['state'] = self.request.GET.get('state')

        if self.request.GET.get('zip'):
            filter_kwargs['zip'] = self.request.GET.get('zip')

        if self.request.GET.get('handraises'):
            filter_kwargs['handraises__isnull'] = False

        return calls.filter(**filter_kwargs)
Exemple #37
0
def skeda_from_skedadict(data_dict, filing_number, header_row_id, is_amended,
                         cd):
    """ We can either pass the header row in or not; if not, look it up."""

    data_dict['transaction_id'] = data_dict['transaction_id'][:20]
    data_dict['header_id'] = header_row_id
    data_dict['superceded_by_amendment'] = is_amended
    data_dict['filing_number'] = filing_number

    if data_dict['contribution_date']:
        try:
            data_dict['contribution_date_formatted'] = dateparse(
                data_dict['contribution_date'])
        except ValueError:
            # if we can't parse the date, just ignore it.
            pass

    data_dict['contribution_amount'] = validate_decimal(
        data_dict['contribution_amount'])
    data_dict['contribution_aggregate'] = validate_decimal(
        data_dict['contribution_aggregate'])

    cd.writerow('A', data_dict)
Exemple #38
0
def parse_states(fpath):

    state_map = {'H': 'Home', 'M': 'Maze', 'St': 'LDstim',
                 'O': 'Old open field', 'Oc': 'Old open field w/ curtain',
                 'N': 'New open field', 'Ns': 'New open field w/ LDstim',
                 '5hS': '5 hrs of 1 sec every 5 sec', 'L': 'Large open field',
                 'X': 'Extra large open field',
                 'Nc': 'New open field w/ curtain'}

    subject_path, fname = os.path.split(fpath)
    fpath_base, fname = os.path.split(subject_path)
    subject_id, date_text = fname.split('-')
    session_date = dateparse(date_text, yearfirst=True)
    mouse_num = ''.join(filter(str.isdigit, subject_id))
    exp_sheet_path = os.path.join(subject_path, 'YM' + mouse_num + ' exp_sheet.xlsx')
    df = pd.read_excel(exp_sheet_path, sheet_name=1)
    state_ids = df[df['implanted'] == session_date].values[0, 2:15]

    statepath = os.path.join(fpath, 'EEGlength')
    state_times = pd.read_csv(statepath).values
    states = [state_map[x] for x, _ in zip(state_ids, state_times)]

    return states, state_times
Exemple #39
0
 def _check_row(self,row):
   """
   Checks a row of cells against the current rules in self.row_rules
   
   Args:
     row:      List of cells
     
   Returns:
     True/False
   """
   for rule in self.row_rules:
     try:
       if rule['val_type'] == 'date':
         v=float(dateparse(row[rule['col']-1]).strftime('%s'))
       elif rule['val_type'] == 'number':
         v=float(row[rule['col']-1])
       else:
         v=str(row[rule['col']-1])
       if not rule['op'](v):
         return False
     except:
       return False
   return True
Exemple #40
0
def parse_url_for_timestamp(url):

    parsed_url = urlparse.urlparse(url)
    qs = urlparse.parse_qs(parsed_url.query)

    raw_time = None
    if 't' in qs:
        raw_time = qs['t'][0]
    elif 't=' in parsed_url.fragment:
        raw_time = parsed_url.fragment.split('=')[1]  #assume only one frag

    result = (0, 0)
    if raw_time:
        try:
            raw = dateparse(raw_time)
            result = raw.minute, raw.second
        except Exception as e:
            print 'date error:', e
            result = (0, 0)
        else:
            print 'resulting time', result

    return result
Exemple #41
0
def read_mail_file(filename):
    """parse and extract features from the given email file"""

    headers = email.parser.Parser().parse(open(filename, "r"),
                                          headersonly=True)
    subject = headers['subject']
    words = set(subject.split())

    sender = headers['from']

    # NB: split-n-strip is not the most robust way to parse emails out of a list. We're
    #     hoping that the email-normalization that was done to this dataset makes up for that.
    raw_recipients = headers.get_all('to', []) + \
                     headers.get_all('cc', []) + \
                     headers.get_all('bcc', [])
    recipients = set(address.strip() for field in raw_recipients
                     for address in field.split(","))

    msgdate = dateparse(headers.get('date', a_bad_time))
    day = msgdate.strftime("%Y-%m-%d")

    return EmailRecord(sender, recipients, msgdate, day, subject, words,
                       headers.defects, filename)
Exemple #42
0
def discover(s):
    if not s:
        return null

    for f in string_coercions:
        try:
            return discover(f(s))
        except (ValueError, KeyError):
            pass

    # don't let dateutil parse things like sunday, monday etc into dates
    if s.isalpha() or s.isspace():
        return string

    try:
        d = dateparse(s)
    except (ValueError,
            OverflowError):  # OverflowError for stuff like 'INF...'
        pass
    else:
        return date_ if is_zero_time(d.time()) else datetime_

    return string
Exemple #43
0
    def handle_pull_request(self, payload):
        changes = []
        number = payload['number']
        refname = 'refs/pull/%d/merge' % (number,)
        commits = payload['pull_request']['commits']

        log.msg('Processing GitHub PR #%d' % number, logLevel=logging.DEBUG)

        action = payload.get('action')
        if action not in ('opened', 'reopened', 'synchronize'):
            log.msg("GitHub PR #%d %s, ignoring" % (number, action))
            return changes, 'git'

        change = {
            'revision': payload['pull_request']['head']['sha'],
            'when_timestamp': dateparse(payload['pull_request']['created_at']),
            'branch': refname,
            'revlink': payload['pull_request']['_links']['html']['href'],
            'repository': payload['repository']['html_url'],
            'project': payload['pull_request']['base']['repo']['full_name'],
            'category': 'pull',
            # TODO: Get author name based on login id using txgithub module
            'author': payload['sender']['login'],
            'comments': 'GitHub Pull Request #%d (%d commit%s)' % (
                number, commits, 's' if commits != 1 else ''),
        }

        if callable(self._codebase):
            change['codebase'] = self._codebase(payload)
        elif self._codebase is not None:
            change['codebase'] = self._codebase

        changes.append(change)

        log.msg("Received %d changes from GitHub PR #%d" % (
            len(changes), number))
        return changes, 'git'
        def get_hhapps_client(name):
            access_token = None
            expiry_date = None

            user = current_user
            if user.is_authenticated:
                access_token = user.token.get('access-token', None)
                expiry_date = dateparse(user.token.get('expiry-date'))
            else:
                return None

            schemas = app.config.get('HHCLIENT_APPS_SCHEMAS', {})
            hhapps = app.config.get('HHCLIENT_APPS_ATTRIBUTES', {})

            now = datetime.datetime.utcnow()
            if expiry_date and now > expiry_date:
                refresh_token()
                access_token = session['token']['access-token']

            if name not in hhapps:
                application = get_application(name)
                hhapps[name] = application.data
                app.config['HHCLIENT_APPS_ATTRIBUTES'] = hhapps
            print('apps', hhapps[name]['publicurl'])

            Client = get_client(name)

            client = Client(api_url=hhapps[name]['publicurl'],
                            access_token=access_token,
                            schemas=schemas.get(name, None))

            if name not in schemas:
                if 'HHCLIENT_APPS_SCHEMAS' not in app.config:
                    app.config['HHCLIENT_APPS_SCHEMAS'] = {}
                app.config['HHCLIENT_APPS_SCHEMAS'][name] = client.schemas

            return client
    def init_hhclient():
        if g.get('hhclient', None):
            return

        name = None
        password = None
        access_token = None
        expiry_date = None

        user = current_user
        # auth = session.get('auth', None)
        # print(user.data)
        if user.is_authenticated:
            access_token = user.token.get('access-token', None)
            expiry_date = dateparse(user.token.get('expiry-date'))

        schemas = app.config.get('HHCLIENT_SCHEMAS', None)
        host = app.config.get('HHCLIENT_HOST')
        port = app.config.get('HHCLIENT_PORT')
        secure = app.config.get('HHCLIENT_SECURE')

        now = datetime.datetime.utcnow()
        if expiry_date and now > expiry_date:
            refresh_token()
            access_token = session['token']['access-token']

        Client = get_client('hhservice')
        g.hhclient = Client(name=name,
                            password=password,
                            host=host,
                            port=port,
                            secure_connection=secure,
                            access_token=access_token,
                            schemas=schemas)

        if not schemas:
            app.config['HHCLIENT_SCHEMAS'] = g.hhclient.schemas
def save_fec_contribution(committee, contribution):
    try:
        contribution = Contribution.objects.create(
            committee=committee,
            filing_number=contribution['FECTransID'],
            name=contribution['Contrib'],
            date=dateparse(contribution['Date']),
            employer=contribution['Orgname'],
            occupation=contribution['Occ_EF'],
            street1=contribution['Street'],
            street2='',
            city=contribution['City'],
            state=contribution['State'],
            zipcode=contribution['Zip'],
            amount=contribution['Amount'],
            aggregate=0,
            memo='',
            url='',
            data_row='')
    except IntegrityError:
        print 'Skipping'
        return None

    return contribution
Exemple #47
0
    def modifyData(self, v):
        '''Modify/Fix up data if necessary'''

        if isinstance(v, unicode):

            # Convert iso8601 dates in unicode to datetime:
            #    eq. 2013-12-15T13:31:21.000+0000

            if len(v) >= 17:
                # Quick parse:
                if v[4] + v[7] + v[10] + v[13] + v[16] == '--T::':
                    return dateparse(v).replace(tzinfo=None)

            # Convert unicode dates to datetime
            #    eq. 2013-12-15

            elif len(v) == 10:
                # Quick parse:'
                if v[4] + v[7] == '--' and (v[0:4] + v[5:7] +
                                            v[8:10]).isdigit():
                    return str2datetime(v)

        # Not change
        return v
Exemple #48
0
    def get_metadata(self):
        """Auto-fill all relevant metadata used in run_conversion."""
        session_path = self.data_interface_objects["GrosmarkLFP"].input_args["folder_path"]
        subject_path, session_id = os.path.split(session_path)
        if "_" in session_id:
            subject_id, date_text = session_id.split("_")
        session_start = dateparse(date_text[-4:] + date_text[:-4])

        xml_filepath = os.path.join(session_path, "{}.xml".format(session_id))
        root = et.parse(xml_filepath).getroot()

        n_total_channels = int(root.find("acquisitionSystem").find("nChannels").text)
        shank_channels = [
            [int(channel.text) for channel in group.find("channels")]
            for group in root.find("spikeDetection").find("channelGroups").findall("group")
        ]
        all_shank_channels = np.concatenate(shank_channels)
        all_shank_channels.sort()
        spikes_nsamples = int(root.find("neuroscope").find("spikes").find("nSamples").text)
        lfp_sampling_rate = float(root.find("fieldPotentials").find("lfpSamplingRate").text)

        shank_electrode_number = [x for channels in shank_channels for x, _ in enumerate(channels)]
        shank_group_name = ["shank{}".format(n + 1) for n, channels in enumerate(shank_channels) for _ in channels]

        cell_filepath = os.path.join(session_path, "{}.spikes.cellinfo.mat".format(session_id))
        if os.path.isfile(cell_filepath):
            cell_info = loadmat(cell_filepath)["spikes"]

        celltype_mapping = {"pE": "excitatory", "pI": "inhibitory"}
        celltype_filepath = os.path.join(session_path, "{}.CellClass.cellinfo.mat".format(session_id))
        if os.path.isfile(celltype_filepath):
            celltype_info = [
                str(celltype_mapping[x[0]]) for x in loadmat(celltype_filepath)["CellClass"]["label"][0][0][0]
            ]

        device_name = "implant"
        metadata = dict(
            NWBFile=dict(
                identifier=session_id,
                session_start_time=session_start.astimezone(),
                file_create_date=datetime.now().astimezone(),
                session_id=session_id,
                institution="NYU",
                lab="Buzsaki",
            ),
            Subject=dict(
                subject_id=subject_id,
            ),
            BuzsakiNoRecording=dict(
                Ecephys=dict(
                    subset_channels=all_shank_channels,
                    Device=[dict(name=device_name)],
                    ElectrodeGroup=[
                        dict(
                            name=f"shank{n+1}",
                            description=f"shank{n+1} electrodes",
                            device_name=device_name,
                        )
                        for n, _ in enumerate(shank_channels)
                    ],
                    Electrodes=[
                        dict(
                            name="shank_electrode_number",
                            description="0-indexed channel within a shank.",
                            data=shank_electrode_number,
                        ),
                        dict(
                            name="group",
                            description="A reference to the ElectrodeGroup this electrode is a part of.",
                            data=shank_group_name,
                        ),
                        dict(
                            name="group_name",
                            description="The name of the ElectrodeGroup this electrode is a part of.",
                            data=shank_group_name,
                        ),
                    ],
                )
            ),
            NeuroscopeSorting=dict(
                UnitProperties=[
                    dict(
                        name="cell_type",
                        description="name of cell type",
                        data=celltype_info,
                    ),
                    dict(
                        name="global_id",
                        description="global id for cell for entire experiment",
                        data=[int(x) for x in cell_info["UID"][0][0][0]],
                    ),
                    dict(
                        name="shank_id",
                        description="0-indexed id of cluster from shank",
                        # - 2 b/c the 0 and 1 IDs from each shank have been removed
                        data=[int(x - 2) for x in cell_info["cluID"][0][0][0]],
                    ),
                    dict(
                        name="electrode_group",
                        description="the electrode group that each spike unit came from",
                        data=["shank" + str(x) for x in cell_info["shankID"][0][0][0]],
                    ),
                    dict(
                        name="region",
                        description="brain region where unit was detected",
                        data=[str(x[0]) for x in cell_info["region"][0][0][0]],
                    ),
                ]
            ),
            GrosmarkLFP=dict(),
            GrosmarkBehavior=dict(),
        )

        return metadata
Exemple #49
0
 def __init__(self, stringdate=None):
     if stringdate is None:
         self.date = datetime.utcnow()
     else:
         self.date = dateparse(stringdate)
         self.date.replace(tzinfo=pytz.UTC)
    def handle(self, *args, **options):
        if not options.get('file', False):
            print('--file is required')
            exit(1)

        wb = load_workbook(options['file'])
        ws = wb.get_active_sheet()
        rowcount = 0
        savedcount = 0

        date_signed = None
        if options.get('date_signed', False):
            try:
                date_signed = dateparse(options.get('date_signed'))
            except ValueError:
                print('Invalid date for date_signed!')
                exit(1)

        for row in ws.rows:
            dt_signed = None
            if row[12].value:
                if isinstance(row[12].value, date) or isinstance(
                        row[12].value, datetime):
                    dt_signed = row[12].value
                elif isinstance(row[12].value, str):
                    try:
                        dt_signed = dateparse(row[12].value.strip())
                    except ValueError:
                        print(
                            'INVALID VALUE FOR DATE/TIME SIGNED! Skipping ...')
                        continue

            if date_signed:
                if dt_signed.date() < date_signed.date():
                    print('dt_signed older than date_signed. Skipping ...')
                    continue

            email = None
            if row[4].value and isinstance(row[4].value, str):
                try:
                    email = row[4].value.strip().lower()
                    validate_email(email)
                except ValidationError:
                    print('INVALID EMAIL! {} Skipping ...'.format(
                        row[4].value))
                    continue

            first_name = None
            if row[2].value and isinstance(row[2].value, str):
                first_name = row[2].value.strip().title()

            last_name = None
            if row[3].value and isinstance(row[3].value, str):
                last_name = row[3].value.strip().title()

            zip = None
            non_conforming_zip = None
            if row[8].value:
                zip = str(row[8].value).strip()

                # if it's only 4 characters and all numbers, throw in a leading 0
                if len(zip) == 4:
                    try:
                        int(zip)
                        zip = '0{}'.format(zip)
                    except ValueError:
                        pass

                # if it's 9 characters and all numbers, throw in a -
                if len(zip) == 9:
                    try:
                        int(zip)
                        zip = '{}-{}'.format(zip[:5], zip[5:])
                    except ValueError:
                        pass

                # if we get to here and still don't have a valid zip, we'll throw it in the non-conforming field
                if not re.match(ZIP_REGEX, zip):
                    non_conforming_zip = zip
                    zip = None

            city = None
            state = None
            if zip:
                try:
                    zipcode = ZipCode.objects.get(zip=zip[:5])
                    city = zipcode.city
                    state = zipcode.state
                except ZipCode.DoesNotExist:
                    print('COULD NOT DETERMINE STATE FROM ZIP {}!'.format(zip))

            try:
                PetitionSigner.objects.get(email__iexact=email)
                print(
                    'PETITION SIGNER FOR EMAIL {} EXISTS! Skipping ...'.format(
                        email))
                continue
            except PetitionSigner.DoesNotExist:
                print('NO PETITION SIGNER FOR EMAIL {}! Creating ...'.format(
                    email))
                PetitionSigner.objects.create(
                    first_name=first_name,
                    last_name=last_name,
                    email=email,
                    city=city,
                    state=state,
                    zip=zip,
                    non_conforming_zip=non_conforming_zip,
                    dt_signed=dt_signed)

                savedcount += 1

            # if the petition signer is also a volunteer, update the signed date
            try:
                volunteer = Volunteer.objects.get(email__iexact=email)
                print(
                    'Volunteer found for email {}! Updating petition signed date ...'
                    .format(email))
                volunteer.dt_signed_petition = dt_signed
                volunteer.save()
            except Volunteer.DoesNotExist:
                pass

            rowcount += 1

        print(
            '*** PROCESSING COMPLETE. {} rows processed, {} petition signers created.'
            .format(rowcount, savedcount))
Exemple #51
0
    def get_collection_items(self, headers, args, dataset, pathinfo=None):
        """
        Queries feature collection

        :param headers: dict of HTTP headers
        :param args: dict of HTTP request parameters
        :param dataset: dataset name
        :param pathinfo: path location

        :returns: tuple of headers, status code, content
        """

        headers_ = HEADERS.copy()

        properties = []
        reserved_fieldnames = [
            'bbox', 'f', 'limit', 'startindex', 'resulttype', 'datetime',
            'sortby'
        ]
        formats = FORMATS
        formats.extend(f.lower() for f in PLUGINS['formatter'].keys())

        if dataset not in self.config['datasets'].keys():
            exception = {
                'code': 'InvalidParameterValue',
                'description': 'Invalid feature collection'
            }
            LOGGER.error(exception)
            return headers_, 400, json.dumps(exception, default=json_serial)

        format_ = check_format(args, headers)

        if format_ is not None and format_ not in formats:
            exception = {
                'code': 'InvalidParameterValue',
                'description': 'Invalid format'
            }
            LOGGER.error(exception)
            return headers_, 400, json.dumps(exception)

        LOGGER.debug('Processing query parameters')

        LOGGER.debug('Processing startindex parameter')
        try:
            startindex = int(args.get('startindex'))
            if startindex < 0:
                exception = {
                    'code':
                    'InvalidParameterValue',
                    'description':
                    'startindex value should be positive ' + 'or zero'
                }
                LOGGER.error(exception)
                return headers_, 400, json.dumps(exception)
        except (TypeError) as err:
            LOGGER.warning(err)
            startindex = 0
        except ValueError as err:
            LOGGER.warning(err)
            exception = {
                'code': 'InvalidParameterValue',
                'description': 'startindex value should be an integer'
            }
            LOGGER.error(exception)
            return headers_, 400, json.dumps(exception)

        LOGGER.debug('Processing limit parameter')
        try:
            limit = int(args.get('limit'))
            # TODO: We should do more validation, against the min and max
            # allowed by the server configuration
            if limit <= 0:
                exception = {
                    'code': 'InvalidParameterValue',
                    'description': 'limit value should be strictly positive'
                }
                LOGGER.error(exception)
                return headers_, 400, json.dumps(exception)
        except TypeError as err:
            LOGGER.warning(err)
            limit = int(self.config['server']['limit'])
        except ValueError as err:
            LOGGER.warning(err)
            exception = {
                'code': 'InvalidParameterValue',
                'description': 'limit value should be an integer'
            }
            LOGGER.error(exception)
            return headers_, 400, json.dumps(exception)

        resulttype = args.get('resulttype') or 'results'

        LOGGER.debug('Processing bbox parameter')
        try:
            bbox = args.get('bbox').split(',')
            if len(bbox) != 4:
                exception = {
                    'code': 'InvalidParameterValue',
                    'description': 'bbox values should be minx,miny,maxx,maxy'
                }
                LOGGER.error(exception)
                return headers_, 400, json.dumps(exception)
        except AttributeError:
            bbox = []
        try:
            bbox = [float(c) for c in bbox]
        except ValueError:
            exception = {
                'code': 'InvalidParameterValue',
                'description': 'bbox values must be numbers'
            }
            LOGGER.error(exception)
            return headers_, 400, json.dumps(exception)

        LOGGER.debug('Processing datetime parameter')
        # TODO: pass datetime to query as a `datetime` object
        # we would need to ensure partial dates work accordingly
        # as well as setting '..' values to `None` so that underlying
        # providers can just assume a `datetime.datetime` object
        #
        # NOTE: needs testing when passing partials from API to backend
        datetime_ = args.get('datetime')
        datetime_invalid = False

        if (datetime_ is not None
                and 'temporal' in self.config['datasets'][dataset]['extents']):
            te = self.config['datasets'][dataset]['extents']['temporal']

            if te['begin'].tzinfo is None:
                te['begin'] = te['begin'].replace(tzinfo=pytz.UTC)
            if te['end'].tzinfo is None:
                te['end'] = te['end'].replace(tzinfo=pytz.UTC)

            if '/' in datetime_:  # envelope
                LOGGER.debug('detected time range')
                LOGGER.debug('Validating time windows')
                datetime_begin, datetime_end = datetime_.split('/')
                if datetime_begin != '..':
                    datetime_begin = dateparse(datetime_begin)
                    if datetime_begin.tzinfo is None:
                        datetime_begin = datetime_begin.replace(
                            tzinfo=pytz.UTC)

                if datetime_end != '..':
                    datetime_end = dateparse(datetime_end)
                    if datetime_end.tzinfo is None:
                        datetime_end = datetime_end.replace(tzinfo=pytz.UTC)

                if te['begin'] is not None and datetime_begin != '..':
                    if datetime_begin < te['begin']:
                        datetime_invalid = True

                if te['end'] is not None and datetime_end != '..':
                    if datetime_end > te['end']:
                        datetime_invalid = True

            else:  # time instant
                datetime__ = dateparse(datetime_)
                if datetime__ != '..':
                    if datetime__.tzinfo is None:
                        datetime__ = datetime__.replace(tzinfo=pytz.UTC)
                LOGGER.debug('detected time instant')
                if te['begin'] is not None and datetime__ != '..':
                    if datetime__ < te['begin']:
                        datetime_invalid = True
                if te['end'] is not None and datetime__ != '..':
                    if datetime__ > te['end']:
                        datetime_invalid = True

        if datetime_invalid:
            exception = {
                'code': 'InvalidParameterValue',
                'description': 'datetime parameter out of range'
            }
            LOGGER.error(exception)
            return headers_, 400, json.dumps(exception)

        LOGGER.debug('Loading provider')
        try:
            p = load_plugin('provider',
                            self.config['datasets'][dataset]['provider'])
        except ProviderConnectionError:
            exception = {
                'code': 'NoApplicableCode',
                'description': 'connection error (check logs)'
            }
            LOGGER.error(exception)
            return headers_, 500, json.dumps(exception)
        except ProviderQueryError:
            exception = {
                'code': 'NoApplicableCode',
                'description': 'query error (check logs)'
            }
            LOGGER.error(exception)
            return headers_, 500, json.dumps(exception)

        LOGGER.debug('processing property parameters')
        for k, v in args.items():
            if k not in reserved_fieldnames and k not in p.fields.keys():
                exception = {
                    'code': 'InvalidParameterValue',
                    'description': 'unknown query parameter'
                }
                LOGGER.error(exception)
                return headers_, 400, json.dumps(exception)
            elif k not in reserved_fieldnames and k in p.fields.keys():
                LOGGER.debug('Add property filter {}={}'.format(k, v))
                properties.append((k, v))

        LOGGER.debug('processing sort parameter')
        val = args.get('sortby')

        if val is not None:
            sortby = []
            sorts = val.split(',')
            for s in sorts:
                if ':' in s:
                    prop, order = s.split(':')
                    if order not in ['A', 'D']:
                        exception = {
                            'code': 'InvalidParameterValue',
                            'description': 'sort order should be A or D'
                        }
                        LOGGER.error(exception)
                        return headers_, 400, json.dumps(exception)
                    sortby.append({'property': prop, 'order': order})
                else:
                    sortby.append({'property': s, 'order': 'A'})
            for s in sortby:
                if s['property'] not in p.fields.keys():
                    exception = {
                        'code': 'InvalidParameterValue',
                        'description': 'bad sort property'
                    }
                    LOGGER.error(exception)
                    return headers_, 400, json.dumps(exception)
        else:
            sortby = []

        LOGGER.debug('Querying provider')
        LOGGER.debug('startindex: {}'.format(startindex))
        LOGGER.debug('limit: {}'.format(limit))
        LOGGER.debug('resulttype: {}'.format(resulttype))
        LOGGER.debug('sortby: {}'.format(sortby))

        try:
            content = p.query(startindex=startindex,
                              limit=limit,
                              resulttype=resulttype,
                              bbox=bbox,
                              datetime=datetime_,
                              properties=properties,
                              sortby=sortby)
        except ProviderConnectionError as err:
            exception = {
                'code': 'NoApplicableCode',
                'description': 'connection error (check logs)'
            }
            LOGGER.error(err)
            return headers_, 500, json.dumps(exception)
        except ProviderQueryError as err:
            exception = {
                'code': 'NoApplicableCode',
                'description': 'query error (check logs)'
            }
            LOGGER.error(err)
            return headers_, 500, json.dumps(exception)
        except ProviderGenericError as err:
            exception = {
                'code': 'NoApplicableCode',
                'description': 'generic error (check logs)'
            }
            LOGGER.error(err)
            return headers_, 500, json.dumps(exception)

        serialized_query_params = ''
        for k, v in args.items():
            if k not in ('f', 'startindex'):
                serialized_query_params += '&'
                serialized_query_params += urllib.parse.quote(k, safe='')
                serialized_query_params += '='
                serialized_query_params += urllib.parse.quote(str(v), safe=',')

        content['links'] = [{
            'type':
            'application/geo+json',
            'rel':
            'self' if not format_ or format_ == 'json' else 'alternate',
            'title':
            'This document as GeoJSON',
            'href':
            '{}/collections/{}/items?f=json{}'.format(
                self.config['server']['url'], dataset, serialized_query_params)
        }, {
            'rel':
            'self' if format_ == 'jsonld' else 'alternate',
            'type':
            'application/ld+json',
            'title':
            'This document as RDF (JSON-LD)',
            'href':
            '{}/collections/{}/items?f=jsonld{}'.format(
                self.config['server']['url'], dataset, serialized_query_params)
        }, {
            'type':
            'text/html',
            'rel':
            'self' if format_ == 'html' else 'alternate',
            'title':
            'This document as HTML',
            'href':
            '{}/collections/{}/items?f=html{}'.format(
                self.config['server']['url'], dataset, serialized_query_params)
        }]

        if startindex > 0:
            prev = max(0, startindex - limit)
            content['links'].append({
                'type':
                'application/geo+json',
                'rel':
                'prev',
                'title':
                'items (prev)',
                'href':
                '{}/collections/{}/items?startindex={}{}'.format(
                    self.config['server']['url'], dataset, prev,
                    serialized_query_params)
            })

        if len(content['features']) == limit:
            next_ = startindex + limit
            content['links'].append({
                'type':
                'application/geo+json',
                'rel':
                'next',
                'title':
                'items (next)',
                'href':
                '{}/collections/{}/items?startindex={}{}'.format(
                    self.config['server']['url'], dataset, next_,
                    serialized_query_params)
            })

        content['links'].append({
            'type':
            'application/json',
            'title':
            self.config['datasets'][dataset]['title'],
            'rel':
            'collection',
            'href':
            '{}/collections/{}'.format(self.config['server']['url'], dataset)
        })

        content['timeStamp'] = datetime.utcnow().strftime(
            '%Y-%m-%dT%H:%M:%S.%fZ')

        if format_ == 'html':  # render
            headers_['Content-Type'] = 'text/html'

            # For constructing proper URIs to items
            if pathinfo:
                path_info = '/'.join([
                    self.config['server']['url'].rstrip('/'),
                    pathinfo.strip('/')
                ])
            else:
                path_info = '/'.join([
                    self.config['server']['url'].rstrip('/'),
                    headers.environ['PATH_INFO'].strip('/')
                ])

            content['items_path'] = path_info
            content['dataset_path'] = '/'.join(path_info.split('/')[:-1])
            content['collections_path'] = '/'.join(path_info.split('/')[:-2])
            content['startindex'] = startindex

            content = render_j2_template(self.config, 'items.html', content)
            return headers_, 200, content
        elif format_ == 'csv':  # render
            formatter = load_plugin('formatter', {'name': 'CSV', 'geom': True})

            content = formatter.write(
                data=content,
                options={
                    'provider_def':
                    self.config['datasets'][dataset]['provider']
                })

            headers_['Content-Type'] = '{}; charset={}'.format(
                formatter.mimetype, self.config['server']['encoding'])

            cd = 'attachment; filename="{}.csv"'.format(dataset)
            headers_['Content-Disposition'] = cd

            return headers_, 200, content
        elif format_ == 'jsonld':
            headers_['Content-Type'] = 'application/ld+json'
            content = geojson2geojsonld(self.config, content, dataset)
            return headers_, 200, content

        return headers_, 200, json.dumps(content, default=json_serial)
Exemple #52
0
    def handle_pull_request(self, payload, event):
        changes = []
        number = payload['number']
        refname = 'refs/pull/{}/{}'.format(number, self.pullrequest_ref)
        basename = payload['pull_request']['base']['ref']
        commits = payload['pull_request']['commits']
        title = payload['pull_request']['title']
        comments = payload['pull_request']['body']
        repo_full_name = payload['repository']['full_name']
        head_sha = payload['pull_request']['head']['sha']

        log.msg('Processing GitHub PR #{}'.format(number),
                logLevel=logging.DEBUG)

        head_msg = yield self._get_commit_msg(repo_full_name, head_sha)
        if self._has_skip(head_msg):
            log.msg("GitHub PR #{}, Ignoring: "
                    "head commit message contains skip pattern".format(number))
            return ([], 'git')

        action = payload.get('action')
        if action not in ('opened', 'reopened', 'synchronize'):
            log.msg("GitHub PR #{} {}, ignoring".format(number, action))
            return (changes, 'git')

        properties = self.extractProperties(payload['pull_request'])
        properties.update({'event': event})
        properties.update({'basename': basename})
        change = {
            'revision':
            payload['pull_request']['head']['sha'],
            'when_timestamp':
            dateparse(payload['pull_request']['created_at']),
            'branch':
            refname,
            'revlink':
            payload['pull_request']['_links']['html']['href'],
            'repository':
            payload['repository']['html_url'],
            'project':
            payload['pull_request']['base']['repo']['full_name'],
            'category':
            'pull',
            # TODO: Get author name based on login id using txgithub module
            'author':
            payload['sender']['login'],
            'comments':
            'GitHub Pull Request #{0} ({1} commit{2})\n{3}\n{4}'.format(
                number, commits, 's' if commits != 1 else '', title, comments),
            'properties':
            properties,
        }

        if callable(self._codebase):
            change['codebase'] = self._codebase(payload)
        elif self._codebase is not None:
            change['codebase'] = self._codebase

        changes.append(change)

        log.msg("Received {} changes from GitHub PR #{}".format(
            len(changes), number))
        return (changes, 'git')
Exemple #53
0
    def _process_change(self, payload, user, repo, repo_url, project, event,
                        properties):
        """
        Consumes the JSON as a python object and actually starts the build.

        :arguments:
            payload
                Python Object that represents the JSON sent by GitHub Service
                Hook.
        """
        changes = []
        refname = payload['ref']

        # We only care about regular heads or tags
        match = re.match(r"^refs/(heads|tags)/(.+)$", refname)
        if not match:
            log.msg("Ignoring refname `{}': Not a branch".format(refname))
            return changes
        category = None  # None is the legacy category for when hook only supported push
        if match.group(1) == "tags":
            category = "tag"

        branch = match.group(2)
        if payload.get('deleted'):
            log.msg("Branch `{}' deleted, ignoring".format(branch))
            return changes

        # check skip pattern in commit message. e.g.: [ci skip] and [skip ci]
        head_msg = payload['head_commit'].get('message', '')
        if self._has_skip(head_msg):
            return changes
        commits = payload['commits']
        if payload.get('created'):
            commits = [payload['head_commit']]
        for commit in commits:
            files = []
            for kind in ('added', 'modified', 'removed'):
                files.extend(commit.get(kind, []))

            when_timestamp = dateparse(commit['timestamp'])

            log.msg("New revision: {}".format(commit['id'][:8]))

            change = {
                'author':
                '{} <{}>'.format(commit['author']['name'],
                                 commit['author']['email']),
                'committer':
                '{} <{}>'.format(commit['committer']['name'],
                                 commit['committer']['email']),
                'files':
                files,
                'comments':
                commit['message'],
                'revision':
                commit['id'],
                'when_timestamp':
                when_timestamp,
                'branch':
                branch,
                'revlink':
                commit['url'],
                'repository':
                repo_url,
                'project':
                project,
                'properties': {
                    'github_distinct': commit.get('distinct', True),
                    'event': event,
                },
                'category':
                category
            }
            # Update with any white-listed github event properties
            change['properties'].update(properties)

            if callable(self._codebase):
                change['codebase'] = self._codebase(payload)
            elif self._codebase is not None:
                change['codebase'] = self._codebase

            changes.append(change)

        return changes
def get_markers(db, client_name, clip_id, congress, chamber):
    api_url = API_PREFIX + client_name + '?type=marker&size=100000'
    data = '{"filter": { "term": { "video_id": %s}}, "sort": [{"offset":{"order":"asc"}}]}' % clip_id
    markers = query_api(db, api_url, data)
    clips = []
    bill_ids = []
    legislators = []
    legislator_ids = []
    roll_ids = []

    if markers:
        for m in markers:
            m_new = m['_source']
            c = {
                'offset': m_new['offset'],
                'events': [
                    htmlentitydecode(m_new['name']).strip(),
                ],
                'time': m_new['datetime']
            }
            if m != markers[-1]:  #if it's not the last one
                c['duration'] = markers[markers.index(
                    m) + 1]['_source']['offset'] - m_new['offset']

            year = dateparse(m_new['datetime']).year

            legis, bio_ids = python_utils.extract_legislators(
                c['events'][0], chamber, db)
            b = python_utils.extract_bills(c['events'][0], congress)
            r = python_utils.extract_rolls(c['events'][0], chamber, year)

            if legis:
                c['legislator_names'] = legis
                for l in legis:
                    if l not in legislators:
                        legislators.append(l)
            if bio_ids:
                c['legislator_ids'] = bio_ids
                for bi in bio_ids:
                    if bi not in legislator_ids:
                        legislator_ids.append(bi)

            if r:
                c['roll_ids'] = r
                for ro in r:
                    if ro not in roll_ids:
                        roll_ids.append(ro)

            if b:
                c['bill_ids'] = b
                for bill in b:
                    if bill not in bill_ids:
                        bill_ids.append(bill)

            clips.append(c)

        return (clips, bill_ids, legislators, legislator_ids, roll_ids)

    else:
        db.note('There are no markers for video id: %s' % clip_id)
        return (None, None, None, None, None)
def get_videos(db, es, client_name, chamber, archive=False, captions=False):
    api_url = API_PREFIX + client_name + '?type=video'
    data = '{ "sort": [ {"datetime": {"order": "desc" }} ]  }'
    if archive:
        api_url += '&size=100000'
    else:
        api_url += '&size=2'
    videos = query_api(db, api_url, data)

    if not videos:
        db.warning("Granicus API appears to be down",
                   {'errors': PARSING_ERRORS})
        sys.exit()

    vcount = 0
    for vid in videos:

        v = vid['_source']

        legislative_day = dateparse(v['datetime'])

        video_id = chamber + '-' + str(
            int(timey.mktime(legislative_day.timetuple())))
        new_vid = db.get_or_initialize('videos', {'video_id': video_id})

        #initialize arrays and dicts so we don't have to worry about it later
        if not new_vid.has_key('clip_urls'): new_vid['clip_urls'] = {}
        if not new_vid.has_key('bill_ids'): new_vid['bill_ids'] = []
        if not new_vid.has_key('legislator_ids'):
            new_vid['legislator_ids'] = []
        if not new_vid.has_key('legislator_names'):
            new_vid['legislator_names'] = []

        if not new_vid.has_key('created_at'):
            new_vid['created_at'] = datetime.now()
        new_vid['updated_at'] = datetime.now()
        #video id, clips array, legislators array, bills array

        new_vid = try_key(v, 'id', 'clip_id', new_vid)
        new_vid = try_key(v, 'duration', 'duration', new_vid)
        new_vid = try_key(v, 'datetime', 'published_at', new_vid)

        # normalize timestamp format to RFC3339 in UTC
        new_vid['published_at'] = rfc3339(dateparse(new_vid['published_at']))

        new_vid['clip_urls'] = try_key(v, 'http', 'mp4', new_vid['clip_urls'])
        new_vid['clip_urls'] = try_key(v, 'hls', 'hls', new_vid['clip_urls'])
        new_vid['clip_urls'] = try_key(v, 'rtmp', 'rtmp', new_vid['clip_urls'])

        new_vid['legislative_day'] = legislative_day.strftime('%Y-%m-%d')
        new_vid['chamber'] = chamber
        new_vid['congress'] = python_utils.current_congress(
            legislative_day.year)

        if chamber == 'house':
            new_vid['clips'], new_vid['bill_ids'], new_vid[
                'legislator_names'], new_vid['legislator_ids'], new_vid[
                    'roll_ids'] = get_markers(db, client_name,
                                              new_vid['clip_id'],
                                              new_vid['congress'], chamber)
        elif chamber == 'senate':
            new_vid['clips'], new_vid['bill_ids'], new_vid[
                'legislator_names'], new_vid['legislator_ids'], new_vid[
                    'roll_ids'] = get_clips_for_senate(
                        db, new_vid['clip_id'], new_vid['congress'],
                        new_vid['duration'],
                        dateparse(new_vid['published_at']).year)

        if new_vid['clips'] is None:
            print "Couldn't fetch information for video, skipping."
            continue

        #make sure the last clip has a duration
        if new_vid['clips'] and len(new_vid['clips']) > 0:
            new_vid['clips'][-1]['duration'] = new_vid['duration'] - new_vid[
                'clips'][-1]['offset']

        if captions:
            new_vid['captions'], new_vid['caption_srt_file'] = get_captions(
                client_name, new_vid['clip_id'])

        db['videos'].save(new_vid)
        vcount += 1

        #index clip objects in elastic search

        if captions and new_vid.has_key(
                'clips') and new_vid['clips'] is not None and len(
                    new_vid['clips']) > 0:
            for c in new_vid['clips']:
                clip = {
                    'id':
                    "%s-%s" % (new_vid['video_id'], new_vid['clips'].index(c)),
                    'video_id':
                    new_vid['video_id'],
                    'video_clip_id':
                    new_vid['clip_id'],
                    'offset':
                    c['offset'],
                    'duration':
                    c['duration'],
                    'legislative_day':
                    new_vid['legislative_day'],
                    'published_at':
                    new_vid['published_at'],
                    'clip_urls':
                    new_vid['clip_urls']
                }
                clip = try_key(c, 'legislator_names', 'legislator_names', clip)
                clip = try_key(c, 'roll_ids', 'roll_ids', clip)
                clip = try_key(c, 'events', 'events', clip)
                clip = try_key(c, 'bill_ids', 'bill_ids', clip)
                clip = try_key(c, 'legislator_ids', 'legislator_ids', clip)

                if new_vid.has_key('caption_srt_file'):
                    clip['srt_link'] = new_vid['caption_srt_file'],

                if new_vid.has_key('captions'):
                    clip['captions'] = get_clip_captions(
                        new_vid, c, c == new_vid['clips']
                        [0])  #pass a boolean if this is the first clip

                resp = es.save(clip, 'clips', clip['id'])
        print "Successfully processed %s" % new_vid['clip_id']

    es.connection.indices.refresh()

    db.success("Updated or created %s legislative days for %s video" %
               (client_name, vcount))
Exemple #56
0
def gen_new_date_str(datum, start_time_column, end_time_column):
    new_date = dateparse(datum[start_time_column])
    # print(new_date, datum[start_time_column], datum[end_time_column])
    return utils.date_to_str_1(new_date)
Exemple #57
0
def _asPythonDate(datestr):
    return dateparse(datestr)
Exemple #58
0
async def detect_video(video_file, date, name="Today", notes=""):

    cpus = multiprocessing.cpu_count()

    experiment_uuid = uuid4()
    experiment_day = dateparse(date)
    experiment_dir = os.path.join(config.experiment_dir, str(experiment_uuid))
    experiment = (experiment_uuid, experiment_day, name, "detection", notes)

    try:

        print("Creating data directory", experiment_dir)
        os.mkdir(experiment_dir)

        scaleby = 1
        w, h = int(2336 / scaleby), int(1729 / scaleby)

        # Reads the source video, outputs frames
        print("Launching Video Reader")
        video_reader = FFmpeg(
            video_file,
            "",
            (h, w, 1),
            "-ss 00:00:02.00 -t 00:00:00.50 -vf scale={}:{}".format(w, h),
            [],
            False,
            FrameData,
        )

        print("Launching Database processor")
        db_proc = DB_Processor(experiment_uuid, experiment_day, name, notes)

        print("Launching Entry processor")
        entry_proc = Entry(experiment_uuid)

        print("Launching Magic pixel processor")
        magic_proc = MagicPixel()

        print("Launching Rescale processor")
        rescale_proc = Rescaler()

        # Computes a background for a frame, outputs {"frame": frame, "bg": bg}
        print("Launching Background Modeler")
        bg_proc = BG(model="simpleMax", window_size=50, img_shape=(h, w, 1))

        # Takes a background and a frame, enhances frame to model foreground
        print("Launching Foreground Modeler")
        fg_proc = FG()

        # Takes a foreground frame, binarizes it
        print("Launching Binary Mask Processor")
        mask_proc = Binary("legacyLabeled")

        print("Launching Properties Processor")
        prop_proc = Properties()

        print("Launching Crop Processor")
        crop_proc = Crop_Processor()

        # A utility to view video pipeline output
        raw_player = RawPlayer()
        bg_player = BGPlayer()
        fg_player = FGPlayer()
        mask_player = MaskPlayer()
        crop_player = CropPlayer()
        meta_player = MetaPlayer()

        # A utility to clean up datagram resources
        cleaner = Cleaner()
        # Todo
        # print("Launching Crop Writer")

        # print("Launching Detection Video Writer")
        # print("Launching Particle Commmitter")
        # /todo

        EZ(
            video_reader,
            entry_proc,
            magic_proc,
            meta_player,
            rescale_proc,
            raw_player,
            bg_proc,
            fg_proc,
            mask_proc,
            cleaner,
        ).start().join()

    except Exception as e:

        print("Uh oh. Something went wrong")

        traceback.print_exc()
        # wq.push(None)

        if os.path.exists(experiment_dir):
            print("Removing files from", experiment_dir)
            shutil.rmtree(experiment_dir)

    else:
        pass
        # dbwriter.commit()
        # wq.push(None)

    finally:

        print("Fin.")

    return experiment_uuid
Exemple #59
0
def datestr_to_datetime(value):
    rp = value.rfind('(')+1
    v = dateparse(value[rp:-1])
    print(value, "-->", v)  # spyder: test-skip
    return v
Exemple #60
0
        exit()

    lastFile = stateDir / "last.time"
    subsJSONFile = confDir / "subscriptions.json"

    # Overrule the time from which to download video if we've been asked to
    # keep videos since a certain number of days ago.
    if args.since is not None:
        sinceTimestamp = datetime.now() - relativedelta(days=int(args.since))
        ic("args.since is set", sinceTimestamp)
    elif not lastFile.exists():
        lastFile.write_text(str(time()))
        sinceTimestamp = datetime.now() - relativedelta(days=7)
        ic("lastFile does not exist", str(time()), sinceTimestamp)
    else:
        sinceTimestamp = dateparse(lastFile.read_text())
        ic("lastFile exists and is read", sinceTimestamp)

    tmpJSON = json.loads(subsJSONFile.read_text())
    ic(len(tmpJSON))
    ic(tmpJSON[0])

    baseURL = "https://www.youtube.com/feeds/videos.xml?channel_id="
    feedURLs = [
        baseURL + item["snippet"]["resourceId"]["channelId"]
        for item in tmpJSON
    ]
    ic(feedURLs[:10])

    # Nothing is purged by default
    if args.retain is not None: