Exemple #1
0
def main():
    parser = argparse.ArgumentParser(description="Retrieve petitions from We The People")
    parser.add_argument("-m", "--max", metavar="INTEGER", dest="max", type=int, default=None,
                        help="maximum pages of petitions to retrieve, default is 10, 100 per page")
    parser.add_argument("-s", "--start", metavar="INTEGER", dest="start", type=int, default=1,
                        help="starting page, 100 per page, default is 1")
    parser.add_argument("-q", "--query", metavar="STRING", dest="query", type=str, default="whitehouse+petition",
                        help="The query for searching twitter for petition links, default is 'whitehouse+petition'")
    args = parser.parse_args()

    if args.max is not None and args.max < 1:
        parser.error("How can I scrape less than one pages of twitter results? You make no sense! --max must be one or greater.")

    if args.start < 1:
        parser.error("--start must be one or greater.")

    if not len(sys.argv) > 1:
        log('Running with default values. Use --h to see options.')

    search(args.query, args.start, args.max)

    #write log
    scrapelog["query"] = args.query
    scrapelog["end"] = datetime.now().strftime("%Y-%m-%d-%H:%M:%S")
    write(json.dumps(scrapelog, indent=2), "log-tw-" + scrapelog["begin"] + ".json", log_dir())
    log("Done. Found total %i petitions" % (len(scrapelog["signatures"])))
Exemple #2
0
def process(inFile, outFile, targets, algo):
    capture = cv2.VideoCapture(inFile)
    retval, image = capture.read()
    locations = []
    if retval:
        writer = cv2.VideoWriter(outFile + ".avi", 
            fps=25,
            fourcc=cv2.cv.CV_FOURCC(*"DIVX"),
            frameSize=image.shape[0:2][::-1])
        algorithms = []
        for x in targets:
            algo.start(image, x)
            algorithms.append(algo)
            utils.drawTarget(image, algo.target)
        writer.write(image)

    w,h = image.shape[:2]
    while retval:       
        retval, image = capture.read()
        target = np.array(algo.target) / np.array([h, w, h, w], dtype=np.float32)
        locations.append(target)
        if retval:
            for algo in algorithms:
                algo.next(image)
                color = (255, 0, 0)
                if algo.valid:
                    color = (0, 255, 0)
                utils.drawTarget(image, algo.target, color)
            writer.write(image)

    utils.write(outFile + ".txt", inFile, locations)
Exemple #3
0
    def do(self):
        log_msg = 'Tagging: "%s" as "%s"' % (self._revision, self._name)
        opts = {}
        if self._message:
            opts['F'] = utils.tmp_filename('tag-message')
            utils.write(opts['F'], self._message)

        if self._sign:
            log_msg += ', GPG-signed'
            opts['s'] = True
            status, output = self.model.git.tag(self._name,
                                                self._revision,
                                                with_status=True,
                                                with_stderr=True,
                                                **opts)
        else:
            opts['a'] = bool(self._message)
            status, output = self.model.git.tag(self._name,
                                                self._revision,
                                                with_status=True,
                                                with_stderr=True,
                                                **opts)
        if 'F' in opts:
            os.unlink(opts['F'])

        if output:
            log_msg += '\nOutput:\n%s' % output

        _notifier.broadcast(signals.log_cmd, status, log_msg)
        if status == 0:
            self.model.update_status()
Exemple #4
0
    def panel(self) :
        """
        Set up the side panel
        """

        self.disp.blit(IMG_SIDEPANEL_BG, (16*SQUARE, 16, 450, 496))
        self.btn_traps = []
        x = 16*SQUARE+65
        y = 20
        for i in TRAPS :
            self.disp.blit(i[0], (x,y))

            name = utils.write(i[1], BLACK)
            price = utils.write(str(i[3]), GRAY)
            lines = utils.formattext(i[2], 35, BLACK, 15)

            self.disp.blit(name, (x+40,y+2))
            self.disp.blit(price, (x+275, y+2))

            i = 20
            for l in lines :
                self.disp.blit(l, (x+40, y+i))
                i += 15

            self.btn_traps.append( (x, y, 330, i+5) )

            y += 75

        self.disp.blit(IMG_LEVEL, RECT_LEVEL)
        self.disp.blit(IMG_MONEY, RECT_MONEY)
        self.disp.blit(IMG_LAB_QUIT, BTN_LAB_QUIT)
        self.disp.blit(IMG_LAB_START, BTN_LAB_START)

        self.updatepanel()
Exemple #5
0
def main():
    parser = argparse.ArgumentParser(description="Retrieve petitions from We The People")
    parser.add_argument(
        "-m",
        "--max",
        metavar="INTEGER",
        dest="max",
        type=int,
        default=None,
        help="maximum number of petitions to retrieve",
    )
    parser.add_argument(
        "-s",
        "--start",
        metavar="INTEGER",
        dest="start",
        type=int,
        default=1,
        help="starting page, 20 per page, default is 1",
    )
    args = parser.parse_args()

    if args.max is not None and args.max < 1:
        parser.error("How can I scrape less than one petition? You make no sense! --max must be one or greater.")

    if args.start < 1:
        parser.error("--start must be one or greater.")

    log("Found %i petitions" % (petitions(args.start, args.max)))

    # write log
    scrapelog["end"] = datetime.now().strftime("%Y-%m-%d-%H:%M:%S")
    write(json.dumps(scrapelog, indent=2), "log-wh-" + scrapelog["begin"] + ".json", log_dir())
Exemple #6
0
def split_signatures(pid, signatures=None):
    if not signatures:
        signatures = json.load(open(os.getcwd() + "/data/api/signatures/" + pid + ".json", "r"))
        
    for signature in signatures:
        signature['date'] = datetime.datetime.fromtimestamp(signature['created']).strftime("%y-%m-%d")
        signature['time'] = datetime.datetime.fromtimestamp(signature['created']).strftime("%H:%M:%S")
        #rm this needless field
        if signature['type'] == "signature":
            signature.pop("type")

    dates = sorted(set(map(lambda x:x['date'], signatures)))
    mostrecent = max([x['created'] for x in signatures])
    
    stats = {
        'total': len(signatures),
        'dates': [],
        'last': datetime.datetime.fromtimestamp(mostrecent).strftime("%y-%m-%d"),
        'laststamp': mostrecent
    }
    
    for day in dates:
        sigs = [x for x in signatures if x['date'] == day]
        stats['dates'].append((day, len(sigs)))
        write(json.dumps(sigs), "api/signatures/" + pid + "/" + day + ".json")
        
    write(json.dumps(stats, indent=2), "api/signatures/" + pid + "/stats.json")
Exemple #7
0
def combine():
    roster = defaultdict(list)    
    total = [defaultdict(int) for x in range(segments)]
    starts = {}
    data = json.load(open("data/times/all.json", "r"))
    duds = 0
    co = 0
    for runner in data:
        #print runner["bib number"], runner["5K"]
        #see if he/she showed up
        if "5K" not in runner or not runner["5K"][1]:
            duds += 1
            continue
        co += 1
        if co % 100 == 0:
            print co
        #placement will represent which marker he/she was closest to at each interval
        placement = ["0" for x in range(segments)]
        #stamps is the timestamps scraped from BAA.org
        stamps = [runner[x][1] for x in posts]
        marker = 0

        #fill in placement with most recent split time (intervals of 5K + half and finish)
        for c in range(segments):
            if c > 0:
                placement[c] = placement[c - 1]
            if marker < len(posts) and stamps[marker] and stamps[marker] < c * INTERVAL:
                placement[c] = posts[marker]
                marker += 1

        placement = [int(x.replace("K", "").replace("Finish Net", "42").replace("HALF", "21")) for x in placement]
        #print placement
        #print runner["bib number"]
        
        #calculate interpolations between kilometer marks

        #start at appropriate place for offset in starting point
        c = int(round(runner["0K"] / INTERVAL))
        while c < len(placement):
            if placement[c] == placement[-1] or c >= len(placement) - 2:
                break
            t = 1
            while c+t < len(placement) and placement[c + t] == placement[c]:
                t += 1
            #print c, t, placement[c+t], placement[c]
            step = float(placement[c+t]-placement[c]) / t
            for i in range(1, t):
                placement[c + i] = int(math.floor(placement[c + i] + i * step))
            c += t

        #print placement
        key = "_".join([str(x) for x in placement])
        roster[key].append(runner["bib number"])

        for c in range(segments):
            total[c][placement[c]] += 1
        

    write(json.dumps(roster, indent=2), "times/condensed.json")
    write(json.dumps(total, indent=2), "times/condensed_time.json")
Exemple #8
0
def update_bill_version_list(only_congress):
    bill_versions = {}

    # Which sitemap years should we look at?
    if not only_congress:
        sitemap_files = glob.glob(utils.cache_dir() + "/fdsys/sitemap/*/BILLS.xml")
    else:
        # If --congress=X is specified, only look at the relevant years.
        sitemap_files = [
            utils.cache_dir() + "/fdsys/sitemap/" + str(year) + "/BILLS.xml"
            for year in utils.get_congress_years(only_congress)
        ]
        sitemap_files = [f for f in sitemap_files if os.path.exists(f)]

    # For each year-by-year BILLS sitemap...
    for year_sitemap in sitemap_files:
        dom = etree.parse(year_sitemap).getroot()
        if dom.tag != "{http://www.sitemaps.org/schemas/sitemap/0.9}urlset":
            raise Exception("Mismatched sitemap type.")

        # Loop through each bill text version...
        for file_node in dom.xpath("x:url", namespaces=ns):
            # get URL and last modified date
            url = str(file_node.xpath("string(x:loc)", namespaces=ns))
            lastmod = str(file_node.xpath("string(x:lastmod)", namespaces=ns))

            # extract bill congress, type, number, and version from the URL
            m = re.match(r"http://www.gpo.gov/fdsys/pkg/BILLS-(\d+)([a-z]+)(\d+)(\D.*)/content-detail.html", url)
            if not m:
                raise Exception("Unmatched bill document URL: " + url)
            congress, bill_type, bill_number, version_code = m.groups()
            congress = int(congress)
            if bill_type not in utils.thomas_types:
                raise Exception("Invalid bill type: " + url)

            # If --congress=XXX is specified, only look at those bills.
            if only_congress and congress != only_congress:
                continue

            # Track the documents by congress, bill type, etc.
            bill_versions.setdefault(congress, {}).setdefault(bill_type, {}).setdefault(bill_number, {})[
                version_code
            ] = {"url": url, "lastmod": lastmod}

    # Output the bill version info. We can't do this until the end because we need to get
    # the complete list of versions for a bill before we write the file, and the versions
    # may be split across multiple sitemap files.

    for congress in bill_versions:
        for bill_type in bill_versions[congress]:
            for bill_number in bill_versions[congress][bill_type]:
                utils.write(
                    json.dumps(
                        bill_versions[congress][bill_type][bill_number],
                        sort_keys=True,
                        indent=2,
                        default=utils.format_datetime,
                    ),
                    output_for_bill(congress, bill_type, bill_number, "text-versions.json"),
                )
Exemple #9
0
def fetch_votes(session, rootdir):
    #get list of all votes from session from GovTrack
    votes = parse("http://www.govtrack.us/data/us/%s/rolls/" % session)

    for vote in [x for x in votes.xpath("//a/@href") if x[-4:] == ".xml"]:
        chamber = "house" if vote[0] == 'h' else "senate"
        url = "http://www.govtrack.us/data/us/%s/rolls/%s" % (session, vote)
        doc = download(url, session + "/" + vote)
        doc = doc.replace("&", "&amp;")
        try:
            markup = lxml.objectify.fromstring(doc)
        except Exception, e:
            print "Couldn't read", url
            print e
            continue
        data = {}
        data["rollcall"] = {}
        #walk through xml and collect key/value pairs
        for el in markup.getiterator():
            if el.attrib == {}:
                data[el.tag] = el.text
            elif el.tag == 'voter':
                data["rollcall"][el.attrib["id"]] = el.attrib["value"]
        print rootdir + "/data/json/%s/%s/%s.json" % (chamber, session, vote[:-4])
                
        write(json.dumps(data, indent=2), rootdir + "/data/json/%s/%s/%s.json" % (chamber, session, vote[:-4]))
def run(options):
  # Load the committee metadata from the congress-legislators repository and make a
  # mapping from thomas_id and house_id to the committee dict. For each committee,
  # replace the subcommittees list with a dict from thomas_id to the subcommittee.
  utils.require_congress_legislators_repo()
  committees = { }
  for c in utils.yaml_load("congress-legislators/committees-current.yaml"):
    committees[c["thomas_id"]] = c
    if "house_committee_id" in c: committees[c["house_committee_id"] + "00"] = c
    c["subcommittees"] = dict((s["thomas_id"], s) for s in c.get("subcommittees", []))

  for chamber in ("house", "senate"):
    # Load any existing meetings file so we can recycle GUIDs generated for Senate meetings.
    existing_meetings = []
    output_file = utils.data_dir() + "/committee_meetings_%s.json" % chamber
    if os.path.exists(output_file):
      existing_meetings = json.load(open(output_file))

    # Scrape for meeting info.
    if chamber == "senate":
      meetings = fetch_senate_committee_meetings(existing_meetings, committees, options)
    else:
      meetings = fetch_house_committee_meetings(existing_meetings, committees, options)

    # Write out.
    utils.write(json.dumps(meetings, sort_keys=True, indent=2, default=utils.format_datetime),
      output_file)
Exemple #11
0
def process_bill(bill_id, options):
    fdsys_xml_path = _path_to_billstatus_file(bill_id)
    logging.info("[%s] Processing %s..." % (bill_id, fdsys_xml_path))

    # Read FDSys bulk data file.
    xml_as_dict = read_fdsys_bulk_bill_status_file(fdsys_xml_path, bill_id)
    bill_data = form_bill_json_dict(xml_as_dict)

    # Convert and write out data.json and data.xml.
    utils.write(
        unicode(json.dumps(bill_data, indent=2, sort_keys=True)),
        os.path.dirname(fdsys_xml_path) + '/data.json')

    from bill_info import create_govtrack_xml
    with open(os.path.dirname(fdsys_xml_path) + '/data.xml', 'wb') as xml_file:
        xml_file.write(create_govtrack_xml(bill_data, options))

    if options.get("amendments", True):
        process_amendments(bill_id, xml_as_dict, options)

    # Mark this bulk data file as processed by saving its lastmod
    # file under a new path.
    utils.write(
        utils.read(_path_to_billstatus_file(bill_id).replace(".xml", "-lastmod.txt")),
        os.path.join(os.path.dirname(fdsys_xml_path), "data-fromfdsys-lastmod.txt"))

    return {
        "ok": True,
        "saved": True,
    }
def write_report(report):
  data_path = "%s/%s/%s/report.json" % (report['inspector'], report['year'], report['report_id'])
  utils.write(
    utils.json_for(report),
    "%s/%s" % (utils.data_dir(), data_path)
  )
  return data_path
Exemple #13
0
def write_bill_version_metadata(bill_version_id):
  bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)

  bill_version = {
    'bill_version_id': bill_version_id,
    'version_code': version_code,
    'urls': { },
  }

  mods_ns = {"mods": "http://www.loc.gov/mods/v3"}
  doc = etree.parse(document_filename_for(bill_version_id, "mods.xml"))
  locations = doc.xpath("//mods:location/mods:url", namespaces=mods_ns)

  for location in locations:
    label = location.attrib['displayLabel']
    if "HTML" in label:
      format = "html"
    elif "PDF" in label:
      format = "pdf"
    elif "XML" in label:
      format = "xml"
    else:
      format = "unknown"
    bill_version["urls"][format] = location.text

  bill_version["issued_on"] = doc.xpath("string(//mods:dateIssued)", namespaces=mods_ns)

  utils.write(
    json.dumps(bill_version, sort_keys=True, indent=2, default=utils.format_datetime), 
    output_for_bill_version(bill_version_id)
  )

  return {'ok': True, 'saved': True}
Exemple #14
0
def write_bill_catoxml(bill_version_id, options):
  utils.write(
    extract_xml_from_json(fetch_single_bill_json(bill_version_id)),
    document_filename_for(bill_version_id, "catoxml.xml")
  )

  return {'ok': True, 'saved': True}
Exemple #15
0
def trade_reciprocity(years,resource):
  corrmeans = []
  for year in years:
    G = get_graph(year,resource)
    corrcoeffs = []
    [xs,ys] = [[],[]]
    for country in G.nodes():
      for e in G.edges(country):
        try:
          [x1,y1] = [G[e[0]][e[1]],G[e[1]][e[0]]]
          #print [x1,y1]
          xs.append(x1['weight'])
          ys.append(y1['weight'])
        except KeyError:
          'whoops'
    if len(xs)>1:
      cc = np.corrcoef([xs,ys])
      corrcoeffs.append(cc[0][1])
    #print corrcoeffs
    corrmeans.append(np.mean(corrcoeffs))
    print [year,np.mean(corrcoeffs)]
  write({'means':corrmeans, 'years':years},get_results_directory(resource),'meanReciprocityCorrelation')
  plt.clf()
  plt.plot(years,corrmeans)
  plt.title('Mean Correlation of Import/Export By Year')
  plt.xlabel('Year')
  plt.ylabel('Mean Correlation of Import/Export')
  directory = get_images_directory(resource)
  plt.savefig(directory+'meanReciprocityCorrelation.png')
  plt.clf()
  return 0
def fetch_version(bill_version_id, options):
  logging.info("\n[%s] Fetching..." % bill_version_id)
  
  bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
  # bill_id = "%s%s-%s" % (bill_type, number, congress)

  mods_filename = filename_for(bill_version_id)
  mods_cache = version_cache_for(bill_version_id, "mods.xml")
  issued_on, urls = fdsys.document_info_for(mods_filename, mods_cache, options)
  
  bill_version = {
    'issued_on': issued_on,
    'urls': urls,
    'version_code': version_code,
    'bill_version_id': bill_version_id
  }

  # 'bill_version_id': bill_version_id,
  #   'version_code': version_code

  utils.write(
    json.dumps(bill_version, sort_keys=True, indent=2, default=utils.format_datetime), 
    output_for_bill_version(bill_version_id)
  )

  return {'ok': True, 'saved': True}
Exemple #17
0
def output_nomination(nomination, options):
    logging.info("[%s] Writing to disk..." % nomination['nomination_id'])

    # output JSON - so easy!
    utils.write(
        json.dumps(nomination, sort_keys=True, indent=2, default=utils.format_datetime),
        output_for_nomination(nomination['nomination_id'], "json")
    )
Exemple #18
0
 def __write_templates(self, project_name, dir_name):
     """
     Generate upstart and startup for project
     """
     upstart = self.__generate_project_template(project_name,'upstart_template')
     startup = self.__generate_project_template(project_name, 'startup_template')
     write('%s/upstart.conf' % dir_name, upstart)
     write('%s/startup.sh' % dir_name, startup)
Exemple #19
0
def get_sitemap(year, collection, lastmod, options):
  """Gets a single sitemap, downloading it if the sitemap has changed.
  
  Downloads the root sitemap (year==None, collection==None), or
  the sitemap for a year (collection==None), or the sitemap for
  a particular year and collection. Pass lastmod which is the current
  modification time of the file according to its parent sitemap, which
  is how it knows to return a cached copy.
  
  Returns the sitemap parsed into a DOM.
  """
  
  # Construct the URL and the path to where to cache the file on disk.
  if year == None:
    url = "http://www.gpo.gov/smap/fdsys/sitemap.xml"
    path = "fdsys/sitemap/sitemap.xml"
  elif collection == None:
    url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/sitemap_%s.xml" % (year, year)
    path = "fdsys/sitemap/%s/sitemap.xml" % year
  else:
    url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/%s_%s_sitemap.xml" % (year, year, collection)
    path = "fdsys/sitemap/%s/%s.xml" % (year, collection)
    
  # Should we re-download the file?
  lastmod_cache_file = utils.cache_dir() + "/" + path.replace(".xml", "-lastmod.txt")
  if options.get("cached", False):
    # If --cached is used, don't hit the network.
    force = False
  elif not lastmod:
    # No *current* lastmod date is known for this file (because it is the master
    # sitemap file, probably), so always download.
    force = True
  else:
    # If the file is out of date or --force is used, download the file.
    cache_lastmod = utils.read(lastmod_cache_file)
    force = (lastmod != cache_lastmod) or options.get("force", False)
    
  if force:
    logging.warn("Downloading: %s" % url)
    
  body = utils.download(url, path, utils.merge(options, {
    'force': force, 
    'binary': True
  }))
  
  if not body:
      raise Exception("Failed to download %s" % url)
      
  # Write the current last modified date to disk so we know the next time whether
  # we need to fetch the file.
  if lastmod and not options.get("cached", False):
    utils.write(lastmod, lastmod_cache_file)
  
  try:
    return etree.fromstring(body)
  except etree.XMLSyntaxError as e:
    raise Exception("XML syntax error in %s: %s" % (url, str(e)))
Exemple #20
0
def write_bill_catoxml(bill_version_id, options):
    catoxml_filename = catoxml_filename_for(bill_version_id)

    utils.write(
        extract_xml_from_json(fetch_single_bill_json(bill_version_id)),
        catoxml_filename
    )

    return {"ok": True, "saved": True}
Exemple #21
0
def save_bill_search_state(saved_bills, search_state):
    # For --fast mode, cache the current search result listing (in search_state)
    # to disk so we can detect major changes to the bill through the search
    # listing rather than having to parse the bill.
    for bill_id in saved_bills:
        if bill_id in search_state:
            fast_cache_path = utils.cache_dir() + "/" + bill_info.bill_cache_for(bill_id, "search_result.html")
            new_state = search_state[bill_id]
            utils.write(new_state, fast_cache_path)
Exemple #22
0
def save_meta_result(result):
  path = meta_path_for(result['type'], result['agency'], result['year'], result['id'])

  # for paged metadata, don't overwrite if we've got it already,
  # we don't keep anything that should change.
  if os.path.exists(path):
    logging.debug("[%s][%s] Knew about it, skipping." % (result['id'], result['type']))
  else:
    logging.warn("[%s][%s] Newly discovered, saving metadata." % (result['id'], result['type']))
    utils.write(utils.json_for(result), path)
Exemple #23
0
def Write(target, source, env):
    # we don't use target and source as usual : we may apply several times this
    # builder on the same source/target (or the source may be the target), 
    # that's not possible for scons
    files, contents = WriteArgs(target, source, env)

    for f, c in zip(files, contents):
        utils.write(c, f)

    return None
Exemple #24
0
def save(url, nzb_path):
    file, e = _load_nzb(url)
    if e is None:
        try:
            utils.write(nzb_path, file, 'wb')
        except:
            e = "Pneumatic failed writing %s" % nzb_path
        else:
            e = "Pneumatic saved %s" % nzb_path
    print e
    return
Exemple #25
0
def mirror_file(year, collection, package_name, lastmod, granule_name, file_types, options):
  # Where should we store the file?
  path = get_output_path(year, collection, package_name, granule_name, options)
  if not path: return # should skip
  
  # Do we need to update this record?
  lastmod_cache_file = path + "/lastmod.txt"
  cache_lastmod = utils.read(lastmod_cache_file)
  force = ((lastmod != cache_lastmod) or options.get("force", False)) and not options.get("cached", False)
  
  # Try downloading files for each file type.
  targets = get_package_files(package_name, granule_name, path)
  updated_file_types = set()
  for file_type in file_types:
    if file_type not in targets: raise Exception("Invalid file type: %s" % file_type)
    f_url, f_path = targets[file_type]
    
    if (not force) and os.path.exists(f_path): continue # we already have the current file
    logging.warn("Downloading: " + f_path)
    data = utils.download(f_url, f_path, utils.merge(options, {
      'binary': True, 
      'force': force, 
      'to_cache': False,
      'needs_content': file_type == "text" and f_path.endswith(".html"),
    }))
    updated_file_types.add(file_type)
    
    if not data:
      if file_type == "pdf":
        # expected to be present for all packages
        raise Exception("Failed to download %s" % package_name)
      else:
        # not all packages have all file types, but assume this is OK
        logging.error("file not found: " + f_url)
        continue
    
    if file_type == "text" and f_path.endswith(".html"):
      # The "text" format files are put in an HTML container. Unwrap it into a .txt file.
      # TODO: Encoding? The HTTP content-type header says UTF-8, but do we trust it?
      #       html.fromstring does auto-detection.
      with open(f_path[0:-4] + "txt", "w") as f:
        text_content = unicode(html.fromstring(data).text_content())
        f.write(text_content.encode("utf8"))
        
  if collection == "BILLS" and "mods" in updated_file_types:
    # When we download bill files, also create the text-versions/data.json file
    # which extracts commonly used components of the MODS XML.
    from bill_versions import write_bill_version_metadata
    write_bill_version_metadata(get_bill_id_for_package(package_name, with_version=True))

  # Write the current last modified date to disk so we know the next time whether
  # we need to fetch the files for this sitemap item.
  if lastmod and not options.get("cached", False):
    utils.write(lastmod, lastmod_cache_file) 
Exemple #26
0
def get_edges():
    data = json.load(open(os.getcwd() + "/data/api/reports/by_petition.json", 'r'))
    keys = sorted(data.keys())
    edges ={}
    
    for (x,y) in combinations(keys, 2):
        if (x > y):
            x,y = y,x
        edges[x + "_" + y] = set(data[x]).intersection(set(data[y]))

    write(json.dumps(data, indent=2), "api/reports/edges.json")        
Exemple #27
0
def run(options):

  for_the_week = get_monday_week(options.get('for_the_week', None)) #yyyymmdd

  logging.info('Scraping upcoming bills from docs.house.gov/floor for the week %s.' % for_the_week)
  
  # Parse the content into upcoming_bills
  upcoming_bills = fetch_bills_week(for_the_week, options)

  # Write the json to data folder
  output_file = utils.data_dir() + "/upcoming_bills_%s.json" % for_the_week
  utils.write(json.dumps(upcoming_bills, sort_keys=True, indent=2, default=utils.format_datetime), output_file)
def run(options):
  # accepts yyyymmdd format
  for_the_week = get_monday_of_week(options.get('week_of', None))

  logging.warn('Scraping upcoming bills from docs.house.gov/floor for the week of %s.\n' % for_the_week)
  house_floor = fetch_floor_week(for_the_week, options)

  output_file = "%s/upcoming_house_floor/%s.json" % (utils.data_dir(), for_the_week)
  output = json.dumps(house_floor, sort_keys=True, indent=2, default=utils.format_datetime)
  utils.write(output, output_file)

  logging.warn("\nFound %i bills for the week of %s, written to %s" % (len(house_floor['upcoming_bills']), for_the_week, output_file))
def combine(options):
    alld  = {}
    files = [x for x in os.listdir(os.getcwd() + "/data/") if re.sub("\d+\.json", "", x) == ""]
    ignore = options.get("ignore", [])
    for file in files:
        sermon = json.load(open(os.getcwd() + "/data/" + file, 'r'))        
        for i in ignore:
            if i in sermon:
                sermon.pop(i)
        alld[sermon['uid']] = sermon
    write(json.dumps(alld, indent=2, sort_keys=True), os.getcwd() + "/data/all.json")
    write(json.dumps(alld, sort_keys=True), os.getcwd() + "/data/all.min.json")
def run_for_week(for_the_week, options):
    logging.info('Scraping upcoming bills from docs.house.gov/floor for the week of %s...' % for_the_week)
    house_floor = fetch_floor_week(for_the_week, options)
    if house_floor is None:
        logging.warn("Nothing posted for the week of %s" % for_the_week)
        return

    output_file = "%s/upcoming_house_floor/%s.json" % (utils.data_dir(), for_the_week)
    output = json.dumps(house_floor, sort_keys=True, indent=2, default=utils.format_datetime)
    utils.write(output, output_file)

    logging.warn("Found %i bills for the week of %s, written to %s" % (len(house_floor['upcoming']), for_the_week, output_file))
Exemple #31
0
def main():
    data = read('data/data.json')
    tweets = read('data/tweets.json')

    tf = {}
    idf = {}

    inverted_index = {}
    thread_count = sum(len(rumour) for rumour in data.values())

    for rumour_name, rumour in data.items():
        for thread_id, thread in rumour.items():
            tweets = [thread['source']] + list(
                thread.get('replies', dict()).values())
            word_index = {}
            word_count = 0

            for tweet in tweets:
                text = tweet['text']
                words = [word.lower() for word in re.findall("[\w#@']+", text)]

                for word in words:
                    word_index[word] = word_index.get(word, 0) + 1

                word_count += len(words)

            for word, count in word_index.items():
                tf[word] = tf.get(word, dict())
                tf[word][thread_id] = count / float(word_count)
                inverted_index[word] = inverted_index.get(word, dict())
                inverted_index[word][thread_id] = 1

    for word, thread_dict in inverted_index.items():
        idf[word] = math.log(thread_count / float(len(thread_dict)))

    write('data/tfidf.json', {
        "tf": tf,
        "idf": idf,
        "inverted_index": inverted_index
    })
    def train(self, batches):
        loss_total = 0.
        crr = 0.
        true_total = 0.
        pred_total = 0.

        start = time.time()
        batch_indices = range(len(batches))
        np.random.shuffle(batch_indices)

        self.model.feat_layer.is_train.set_value(1)
        for index, b_index in enumerate(batch_indices):
            if (index + 1) % 100 == 0:
                print '%d' % (index + 1),
                sys.stdout.flush()

            batch = batches[b_index]
            loss_i, crr_i, true_i, pred_i = self.train_func(*batch)

            loss_total += loss_i
            crr += crr_i
            true_total += true_i
            pred_total += pred_i

        avg_loss = loss_total / float(len(batches))
        precision = crr / pred_total if pred_total > 0 else 0.
        recall = crr / true_total if true_total > 0 else 0.
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.

        write('\n\tTime: %f seconds' % (time.time() - start))
        write('\tAverage Negative Log Likelihood: %f' % avg_loss)
        write('\tLabel: F1:%f\tP:%f(%d/%d)\tR:%f(%d/%d)' % (f1, precision, crr, pred_total, recall, crr, true_total))
Exemple #33
0
def create_preload_list():
    preload_json = None

    if PRELOAD_CACHE and os.path.exists(PRELOAD_CACHE):
        logging.debug("Using cached Chrome preload list.")
        preload_json = json.loads(open(PRELOAD_CACHE).read())
    else:
        logging.debug("Fetching Chrome preload list from source...")

        # Downloads the chromium preloaded domain list and sets it to a global set
        file_url = 'https://chromium.googlesource.com/chromium/src/net/+/master/http/transport_security_state_static.json?format=TEXT'

        # TODO: proper try/except around this network request
        request = requests.get(file_url)
        raw = request.content

        # To avoid parsing the contents of the file out of the source tree viewer's
        # HTML, we download it as a raw file. googlesource.com Base64-encodes the
        # file to avoid potential content injection issues, so we need to decode it
        # before using it. https://code.google.com/p/gitiles/issues/detail?id=7
        raw = base64.b64decode(raw).decode('utf-8')

        # The .json file contains '//' comments, which are not actually valid JSON,
        # and confuse Python's JSON decoder. Begone, foul comments!
        raw = ''.join(
            [re.sub(r'^\s*//.*$', '', line) for line in raw.splitlines()])

        preload_json = json.loads(raw)

        if PRELOAD_CACHE:
            logging.debug("Caching preload list at %s" % PRELOAD_CACHE)
            utils.write(utils.json_for(preload_json), PRELOAD_CACHE)

    # For our purposes, we only care about entries that includeSubDomains
    fully_preloaded = []
    for entry in preload_json['entries']:
        if entry.get('include_subdomains', False) is True:
            fully_preloaded.append(entry['name'])

    return fully_preloaded
Exemple #34
0
def divide_train_dev(tweets):
    train_categories = read(TRAIN)
    dev_categories = read(DEV)
    train = []
    dev = []

    for tweet in tweets:
        if tweet.get('reply_to'):
            el = {
                'text': tweet['text'],
                'reply_to': tweet['reply_to']
            }

            if tweet['id'] in train_categories:
                el['group'] = train_categories[tweet['id']]
                train += [el]
                # train += [{
                #     'text': tweet['text'],
                #     'reply_to': tweet['reply_to'],
                #     'group': train_categories[tweet['id']]
                # }]
            else:
                el['group'] = dev_categories[tweet['id']]
                dev += [el]
                # dev += [{
                #     'text': tweet['text'],
                #     'reply_to': tweet['reply_to'],
                #     'group': dev_categories[tweet['id']]
                # }]
                # all += [el]

    write('data/train.json', train)
    write('data/dev.json', dev)
    write('data/groups.json', dict(train_categories.items() | dev_categories.items()))
    def predict(self, batches):
        """
        :param batches: 1D: n_batches, 2D: n_words; elem=(x_w, x_m)
        :return: y: 1D: n_batches, 2D: batch_size; elem=(y_pred(1D:n_words), y_proba(float))
        """
        start = time.time()
        y = []

        self.model.feat_layer.is_train.set_value(0)
        for index, inputs in enumerate(batches):
            if (index + 1) % 1000 == 0:
                print '%d' % (index + 1),
                sys.stdout.flush()

            if len(inputs) == 0:
                y_pred = []
            else:
                y_pred = self.pred_func(*inputs)
            y.append(y_pred)

        write('\n\tTime: %f seconds' % (time.time() - start))
        return y
Exemple #36
0
def main():
    args = docopt.docopt(__doc__, version='v0.0.1')
    utils.configure_logging(args['--debug'])

    out_file = args['--output']

    # Read from a .csv, or allow domains on the command line.
    domains = []
    if args['INPUT'][0].endswith(".csv"):
        domains = utils.load_domains(args['INPUT'][0])
    else:
        domains = args['INPUT']

    # If the user wants to sort them, sort them in place.
    if args['--sorted']:
        domains.sort()

    options = {
        'user_agent': args['--user-agent'],
        'timeout': args['--timeout'],
        'preload_cache': args['--preload-cache'],
        'cache': args['--cache']
    }
    results = pshtt.inspect_domains(domains, options)

    # JSON can go to STDOUT, or to a file.
    if args['--json']:
        output = utils.json_for(results)
        if out_file is None:
            print(output)
        else:
            utils.write(output, out_file)
            logging.warn("Wrote results to %s." % out_file)
    # CSV always goes to a file.
    else:
        if args['--output'] is None:
            out_file = 'results.csv'
        pshtt.csv_for(results, out_file)
        logging.warn("Wrote results to %s." % out_file)
    def report_rates_on_epoch(self, label: str, epno: int,
                              batch_results: BatchResult,
                              report_params: ReportParameters) -> None:
        report_str = 'Total #preds: {}\n'.format(batch_results.total_weight)

        true_pred = batch_results.weighted_true_preds
        false_miss = batch_results.weighted_n_labs - batch_results.weighted_true_preds
        false_pred = batch_results.weighted_n_preds - batch_results.weighted_true_preds
        # true_miss = (batch_results.total_weight - batch_results.weighted_n_labs) - false_pred

        report_for_i = lambda i: list(
            map(lambda x: x[i], (report_params.label_name_map, true_pred,
                                 false_miss, false_pred)))
        report_str += tabulate(
            list(map(report_for_i, range(report_params.top_k))),
            headers=['Label', '#Correct', '#Missed', '#Falsely Predicted'])
        report_str += '\n'

        utils.write(
            report_str,
            os.path.join('reports', utils.get_time_str(),
                         'epoch_{}_{}'.format(epno, label)))
Exemple #38
0
def _prepare(language):
    wikipron = load_wikipron(language)
    # Preprocess
    preprocessor = Preprocessor(language)
    wikipron["preprocessed"] = wikipron["phonemes"].apply(preprocessor.preprocess)
    # Filter
    wikipron = wikipron[
        wikipron["preprocessed"].str.split(" ").apply(len) > 3
    ]  # at least two phonemes
    # Split
    train, valid = train_test_split(
        wikipron, train_size=0.9, random_state=SEED, shuffle=True
    )
    # Order train
    train = train.sample(frac=1)
    train["length"] = train["preprocessed"].str.len()
    train.sort_values(by=["frequency", "length"], ascending=(False, True), inplace=True)
    # Write
    filename = f"data/phoneme/wikipron/{language}/train.txt"
    write(train["preprocessed"], filename)
    filename = f"data/phoneme/wikipron/{language}/validation.txt"
    write(valid["preprocessed"], filename)
Exemple #39
0
def main():

    configs = utils.loadConfigs()

    # create and initialize analysis object
    analyzer = analysis.Analysis("USDT_BTC", 300, 500, configs)
    analyzer.setup()

    # create and initialize transactor (intially holding btc)
    trans_que = queue.Queue()
    trans = transactor.TransactorThread("USDT_BTC", trans_que, True, configs)
    trans.start()

    utils.sendMsg("trader started")
    utils.write("trader started")
    while True:
        if (analyzer.update()):
            val = analyzer.analyze()
            trans_que.put(val)
        time.sleep(60)

    trans.join()
Exemple #40
0
def index(options = {}):
    concordance = defaultdict(list)
    files = [x for x in os.listdir(os.getcwd() + "/data/") if re.sub("\d+\.json", "", x) == ""]
    if options.get('limit', False):
        files = files[:options.get('limit')]

    for file in files:
        sermon = json.load(open(os.getcwd() + "/data/" + file, 'r'))
        words = uniq(re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower()))
                
        '''
        if options.get("uniques", False):
            words = uniq(re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower()))
        else:
            words = re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower())
        '''
        for word in words:
            if len(word) > 2:
                concordance[word].append(file.replace('.json', ''))
                
    write(json.dumps(concordance, sort_keys=True, indent=2), os.getcwd() + "/src/data/index.json")
    write(json.dumps(concordance, sort_keys=True), os.getcwd() + "/src/data/index.min.json")
Exemple #41
0
def get_petitions(mx=-1, offset=0):
    limit = 100
    stop = False
    petitions = []

    while not stop:
        data = fetch_petitions(offset, limit)
        if "results" not in data or len(data["results"]) == 0:
            stop = True
            continue
        petitions += data["results"]
        if mx > -1 and len(petitions) > mx:
            petitions = petitions[:mx]
            stop = True

        offset += limit

    for petition in petitions:
        write(json.dumps(petition, indent=2),
              "api/petitions/" + petition['id'] + ".json")

    return petitions
Exemple #42
0
def mirror_bulkdata_file(sitemap, url, item_path, lastmod, options):
    # Where should we store the file?
    path = "%s/fdsys/%s/%s" % (utils.data_dir(), sitemap["collection"], item_path)

    # For BILLSTATUS, store this along with where we store the rest of bill
    # status data.
    if sitemap["collection"] == "BILLSTATUS":
        from bills import output_for_bill
        bill_id, version_code = get_bill_id_for_package(os.path.splitext(os.path.basename(item_path))[0], with_version=False)
        path = output_for_bill(bill_id, FDSYS_BILLSTATUS_FILENAME, is_data_dot=False)

    # Where should we store the lastmod found in the sitemap so that
    # we can tell later if the file has changed?
    lastmod_cache_file = os.path.splitext(path)[0] + "-lastmod.txt"

    # Do we already have this file up to date?
    if os.path.exists(lastmod_cache_file) and not options.get("force", False):
        if lastmod == utils.read(lastmod_cache_file):
            return

    # With --cached, skip if the file is already downloaded.
    if os.path.exists(path) and options.get("cached", False):
        return

    # Download.
    logging.warn("Downloading: " + path)
    data = utils.download(url, path, utils.merge(options, {
        'binary': True,
        'force': True, # decision to cache was made above
        'to_cache': False,
    }))
    if not data:
        # Something failed.
        return

    # Write the current last modified date back to disk so we know the next time whether
    # we need to fetch the file again.
    utils.write(lastmod, lastmod_cache_file)
    def set_train_func(self):
        write('\nBuilding an lp train func...')
        y_label = T.imatrix('y')

        label_proba = self.model.calc_label_proba(self.model.inputs)
        label_pred = self.model.argmax_label_proba(label_proba)
        true_label_path_score = self.model.calc_label_path_score(label_proba, y_label)

        cost = - T.mean(true_label_path_score) + L2Regularizer()(alpha=self.argv.reg, params=self.model.params)
        grads = T.grad(cost=cost, wrt=self.model.params)
        self.optimizer = get_optimizer(argv=self.argv)
        updates = self.optimizer(grads=grads, params=self.model.params)

        self.train_func = theano.function(
            inputs=self.model.inputs + [y_label],
            outputs=[cost,
                     categorical_accuracy(y_true=y_label, y_pred=label_pred),
                     label_pred.flatten(),
                     y_label.flatten()
                     ],
            updates=updates,
            mode='FAST_RUN'
        )
Exemple #44
0
def main():
    data = read(REPLIES)
    negative = [line.rstrip('\n') for line in open(NEGATIVE_LEXICON)]
    positive = [line.rstrip('\n') for line in open(POSITIVE_LEXICON)]
    result = {}
    for tweet in data:
        if "reply_to" in tweet:
            tweet_id = tweet['id']
            text = tweet['text']
            vector = list()
            in_reply_to = tweet['reply_to']
            vector.append(contains_original(text, in_reply_to))
            vector.append(opinion_words_count(text, positive))
            vector.append(opinion_words_count(text, negative))
            if "tags" in tweet:
                tags = tweet['tags']
                vector += reversed_word_order(tags)
            vector.append(contains_question_mark(text))
            result[tweet_id] = {
                'rumour': tweet['rumour'],
                'vector': vector
            }
    write(OUTFILE, result)
Exemple #45
0
def createSQLite3DB(filename, schemapath, override):
    cmd = "sqlite3 {} < {}".format(filename,
                                   os.path.join(schemapath, "sqlite3.sql"))
    write("+ Create SQLite3 database at '{}'".format(filename))

    if os.path.exists(filename) and not override:
        print("+ Database file '{}' exists".format(filename))
        return False

    path = os.path.dirname(filename)
    if not os.path.exists(path):
        try:
            os.makedirs(path)
        except OSError as err:
            print("+ Could not create directory '{}': {} ".format(path, err))
            return False

    res = execute(cmd)
    if res.error:
        print(" + {}".format(res.error))
        return False

    return True
Exemple #46
0
def gen_operators(out, autograd_functions):
    all_operators_declarations = []
    all_operators_defines = []
    for func in autograd_functions:
        if ("namespace" in func["declaration"]["method_of"]
                and func["declaration"]["inplace"] == False):
            declr = gen_operator_declaration(func)
            defn = gen_operator_define(func)

            all_operators_declarations.append(
                OPERATOR_DECLARATION.substitute(declr))
            all_operators_defines.append(OPERATOR_DEFINE.substitute(defn))
        else:
            ## TODO
            pass

    top_env = {
        "auto_operator_declarations": all_operators_declarations,
        "auto_operator_defines": all_operators_defines,
    }

    write(out, "express_operator.h", OPERATORS_H, top_env)
    write(out, "express_operator.cpp", OPERATORS_CPP, top_env)
Exemple #47
0
def run(options):
    # accepts yyyymmdd format
    given_week = options.get('week_of', None)
    if given_week is None:
        for_the_week = get_latest_monday(options)
    else:
        for_the_week = get_monday_of_week(given_week)

    logging.warn(
        'Scraping upcoming bills from docs.house.gov/floor for the week of %s.\n'
        % for_the_week)
    house_floor = fetch_floor_week(for_the_week, options)

    output_file = "%s/upcoming_house_floor/%s.json" % (utils.data_dir(),
                                                       for_the_week)
    output = json.dumps(house_floor,
                        sort_keys=True,
                        indent=2,
                        default=utils.format_datetime)
    utils.write(output, output_file)

    logging.warn("\nFound %i bills for the week of %s, written to %s" %
                 (len(house_floor['upcoming']), for_the_week, output_file))
Exemple #48
0
def extract_bill_version_metadata(package_name, text_path):
    bill_version_id = get_bill_id_for_package(package_name)

    bill_type, number, congress, version_code = utils.split_bill_version_id(
        bill_version_id)

    bill_version = {
        'bill_version_id': bill_version_id,
        'version_code': version_code,
        'urls': {},
    }

    mods_ns = {"mods": "http://www.loc.gov/mods/v3"}
    doc = etree.parse(os.path.join(text_path, "mods.xml"))
    locations = doc.xpath("//mods:location/mods:url", namespaces=mods_ns)

    for location in locations:
        label = location.attrib['displayLabel']
        if "HTML" in label:
            format = "html"
        elif "PDF" in label:
            format = "pdf"
        elif "XML" in label:
            format = "xml"
        else:
            format = "unknown"
        bill_version["urls"][format] = location.text

    bill_version["issued_on"] = doc.xpath("string(//mods:dateIssued)",
                                          namespaces=mods_ns)

    utils.write(
        json.dumps(bill_version,
                   sort_keys=True,
                   indent=2,
                   default=utils.format_datetime),
        output_for_bill_version(bill_version_id))
Exemple #49
0
def gen_rpcs(out, autograd_functions):
    all_rpc_defines = []
    all_rpc_binds = []

    all_names = {}

    for func in autograd_functions:
        if ("namespace" in func["declaration"]["method_of"]
                and func["declaration"]["inplace"] == False):
            name = func["declaration"]["api_name"]
            if (name not in all_names):
                all_names[name] = 0
            else:
                all_names[name] = all_names[name] + 1
                name = name + str(all_names[name])
            bind = gen_rpc_bind(func)
            bind["api_name"] = name
            all_rpc_binds.append(RPC_BIND.substitute(bind))

            define = gen_rpc_define(func)
            define["api_name"] = name
            all_rpc_defines.append(RPC_DEFINE.substitute(define))

        elif ("Tensor" in func["declaration"]["method_of"]
              and func["declaration"]["inplace"] == True):
            ## TODO
            pass
        else:
            pass

    top_env = {
        "auto_rpc_binds": all_rpc_binds,
        "auto_rpc_defines": all_rpc_defines
    }

    write(out, "express_rpc.h", RPC_H, top_env)
    write(out, "express_rpc.cpp", RPC_CPP, top_env)
 def _count_batches(self, train_samples, dev_samples):
     write('\n\tMaking Batches...')
     train_batches = self.preprocessor.make_batches(samples=train_samples)
     if dev_samples:
         dev_batches = self.preprocessor.make_batches(samples=dev_samples)
     else:
         dev_batches = []
     write('\t- Train Batches: %d' % len(train_batches))
     write('\t- Dev   Batches: %d' % len(dev_batches))
    def run(self):
        log = {}

        una, miss, cta = clt.find_missing(self.cfg.move_csv,
                                          self.cfg.video_csv,
                                          self.cfg.output_dir)

        log['missing'] = {
            'unavailable': len(una),
            'missing': len(miss),
            'call_to_action': len(cta)
        }

        miss.to_csv(os.path.join(self.cfg.output_dir, 'missing.csv'), sep='\t')
        una, found = clt.collect(miss, self.cfg.video_dst, self.cfg.output_dir)

        log['collect'] = {'unavailable': len(una), 'found': len(found)}

        update_path = os.path.join(self.cfg.output_dir, 'updated.csv')
        updated, err = clt.update_videos(self.cfg.move_csv, self.cfg.video_csv,
                                         found, self.cfg.video_src,
                                         update_path)
        write('no_videos_clt.txt', err)
        write('collect_videos.json', log)
Exemple #52
0
def gen_parts(p_size, msg, n, k):
    if n < k:
        print('ОШИБКА: число долей={} больше k={}'.format(n, k))
        return

    with open(msg, 'rb') as f:
        msg = bytearray(f.read())

    part_size = ceil(len(msg) / k)
    q = [
        int.from_bytes(msg[i * part_size:(i + 1) * part_size], byteorder='big')
        for i in range(k)
    ]

    p_size_max = max(qi.bit_length() for qi in q) + 1
    if p_size_max > p_size:
        p_size = p_size_max + 1
        print('ЛОГ: длина модуля p изменена на {}'.format(p_size))
    p = gen_prime(p_size)
    mat = gen_mat(p, n, k, q)

    write('p.txt', p)
    for idx, part in enumerate(mat):
        write('part_{}.txt'.format(idx + 1), part)
Exemple #53
0
def main(args):
    print(args)
    index_file = "stem_{}_stop_{}_inverted_index.txt".format(
        args.isstemmed, args.isstopped)
    queries = utils.load_queries(utils.PARSED_QUERIES)

    if args.isstemmed:
        queries = utils.load_queries(utils.STEM_QUERIES)

    index = utils.load_inverted_index(os.path.join(utils.INDEX_DIR,
                                                   index_file))
    stats = utils.load_corpus_stats()

    obj = BM25(args, index, stats, queries[49:54])
    obj.compute_scores()

    file_name = "stem_{}_stop_{}_bm25_score.csv".format(
        args.isstemmed, args.isstopped)
    file_path = os.path.join(utils.RESULT_DIR, "bm25", file_name)
    utils.write(obj.log, file_path, obj.bm25_scores, csvf=True)
    file_name2 = "stem_{}_stop_{}_bm25_score.json".format(
        args.isstemmed, args.isstopped)
    file_path2 = os.path.join(utils.RESULT_DIR, "bm25", file_name2)
    utils.write(obj.log, file_path2, obj.bm25_scores)
Exemple #54
0
def make_show_list(main_data):
    """
    Renders the show list page
    :param main_data: the main TemplateData instance
    :return: empty
    """
    show_template_data = deepcopy(main_data)
    show_summaries = []

    for year in get_defined_years():
        for season in reversed(seasons):
            yaml_path = path.join(utils.root, 'site', year, season,
                                  'show.yaml')
            if not path.isfile(yaml_path):
                continue

            show_data = load_or_die(yaml_path)
            graphic = get_show_graphic(year, season)
            is_current = year == current_year and season == current_season
            show_data.update({
                'year': year,
                'season': season,
                'graphic': graphic,
                'is_current': is_current
            })

            show_template_data.bind('show', show_data)
            show_summaries.append(
                compiled_summary_template.evaluate(show_template_data))

    show_list_data = deepcopy(main_data)
    show_list_data.bind('show_list', show_summaries)

    write(show_list_data, 'MTG - Show List',
          compiled_show_list_template.evaluate(show_list_data), 'site',
          'show_list.html')
Exemple #55
0
def run(options):
    # Load the committee metadata from the congress-legislators repository and make a
    # mapping from thomas_id and house_id to the committee dict. For each committee,
    # replace the subcommittees list with a dict from thomas_id to the subcommittee.
    utils.require_congress_legislators_repo()
    committees = {}
    for c in utils.yaml_load(
            "cache/congress-legislators/committees-current.yaml"):
        committees[c["thomas_id"]] = c
        if "house_committee_id" in c:
            committees[c["house_committee_id"] + "00"] = c
        c["subcommittees"] = dict(
            (s["thomas_id"], s) for s in c.get("subcommittees", []))

    for chamber in ("house", "senate"):
        # Load any existing meetings file so we can recycle GUIDs generated for Senate meetings.
        existing_meetings = []
        output_file = utils.data_dir(
        ) + "/committee_meetings_%s.json" % chamber
        if os.path.exists(output_file):
            existing_meetings = json.load(open(output_file))

        # Scrape for meeting info.
        if chamber == "senate":
            meetings = fetch_senate_committee_meetings(existing_meetings,
                                                       committees, options)
        else:
            meetings = fetch_house_committee_meetings(existing_meetings,
                                                      committees, options)

        # Write out.
        utils.write(
            json.dumps(meetings,
                       sort_keys=True,
                       indent=2,
                       default=utils.format_datetime), output_file)
Exemple #56
0
def lucene_result_parser():
    result_set = defaultdict(list)
    result_path = os.path.join(utils.RESULT_DIR, "lucene_regular")
    files = os.listdir(result_path)

    for file in files:
        qid, ext = os.path.basename(file).split(".")
        file_path = os.path.join(result_path, file)
        if ext == "txt":
            with open(file_path, "r") as fp:
                content = fp.read().split("\n")
                for row in content:
                    if len(row) > 0:
                        doc_path, score = row.split()
                        _, score = score.split("=")
                        docid = os.path.basename(doc_path).split(".")[0]
                        result_set[qid].append((docid, float(score)))

    result_path = os.path.join(utils.RESULT_DIR, "lucene",
                               "stem_False_stop_False_lucene_score.csv")
    result_path2 = os.path.join(utils.RESULT_DIR, "lucene",
                                "stem_False_stop_False_lucene_score.json")
    utils.write(None, result_path, result_set, csvf=True)
    utils.write(None, result_path2, result_set)
    def _run(sim_name):
        local_only = [None] * len(n_sims)

        for n, i in enumerate(n_sims):
            print(i)
            d = info.copy()
            d['n_sim'] = i
            d['simulator'] = sim_name

            # get name for CPU/GPU
            # for backwards comparability, im adding cpu name into gpu col
            if sim_name == 'cl_amd_gpu':
                gpu_name = 'Radeon Vii'
            elif sim_name in ('cl_nvidia', 'cuda'):
                try:
                    from pycuda.driver import Device
                    gpu_name = ''.join([
                        i for i in Device(int(gpu_id)).name().split(' ')
                        if i != 'GeForce'
                    ])
                except:
                    gpu_name = 'RTX2080'
            else:
                gpu_name = get_info()['cpu_name']

            d['gpu_name'] = gpu_name
            d['total_time'], d['sim_time'] = run(i, model, tspan, sim_name)
            write(d)
            local_only[n] = d

        tmp_pd = pd.DataFrame(local_only)
        print(tmp_pd[['n_sim', 'sim_time']])

        out_name = os.path.join(
            cur_dir, 'Timings',
            '{}_{}_{}_timing.csv'.format(computer_name, sim_name, model.name))
Exemple #58
0
def moving_avgs(deq):
    """
	Takes in a deque of 500 data points
	Returns either Buy, Sell, or Hold
	"""

    dat = pd.DataFrame(list(deq))
    d = dat['weightedAverage'].astype(float)
    deque_length = len(d)
    lma = np.mean(d)
    sma_start = int(4 * deque_length / 5)
    sma = np.mean(d[sma_start:])
    date = datetime.datetime.fromtimestamp(int(
        dat.iloc[len(dat) - 1]['date'])).strftime('%Y-%m-%d %H:%M:%S')

    utils.write(date)
    utils.write(str(lma) + "," + str(sma))

    if (sma - lma) / lma > 0.01:
        return 1
    elif (sma - lma) / lma < -0.01:
        return -1
    else:
        return 0
Exemple #59
0
async def cog_load(ctx: Context, *, cog_names: str):
    loaded_cog_names = {name.lower() for name in bot.cogs.keys()}

    cog_names = cog_names.lower().split()

    try:
        to_load = set()
        for cog_name in cog_names:
            if cog_name not in loaded_cog_names:
                to_load.add(NAMES_COGS_MAP[cog_name])
            else:
                raise ValueError
    except KeyError:
        error = InvalidArguments(ctx=ctx, message=f"{cog_name} not found")
        await error.execute()
    except ValueError:
        error = InvalidArguments(ctx=ctx, message=f"{cog_name} already loaded")
        await error.execute()

    else:
        load_cogs(bot, to_load)
        write(COG_PATH, [name.lower() for name in bot.cogs.keys()])

        await ctx.send(f'Successfully loaded {", ".join(cog_names)}')
Exemple #60
0
    def menu_main(self):
        """Build the main menu."""
        self.game = None

        # Print game name
        title, title_pos = write('Burglar', 124, 'Multicolore.otf',
                                 colorScheme.MAINMENUTITLE)
        title_pos.centerx = self.background.get_rect().centerx
        title_pos.centery = self.background.get_rect().height / 3
        self.background.blit(title, title_pos)

        self.mm.assemble()
        self.mm.menu_pos.centery = 2 * (self.background.get_rect().height / 3)
        self.mm.menu_pos.centerx = self.background.get_rect().centerx
        self.background.blit(self.mm.menu, self.mm.menu_pos)