def run():
    options = utils.flags()
    debug = options.get('debug', False)

    filename = "legislators-current.yaml"
    args = utils.args()
    legislators = load_data(filename)

    if len(args) != 0:
        bioguides = args
        print("Fetching contact forms for %s..." % ', '.join(bioguides))
    else:
        bioguides = [member['id']['bioguide'] for member in legislators]
        print("Fetching contact forms for all current members...")

    for legislator in legislators:
        bioguide = legislator['id']['bioguide']
        if bioguide not in bioguides: continue
        if bioguide in SKIP_BIOGUIDES: continue

        if debug: print("Downloading form for %s" % bioguide, flush=True)

        try:
            steps = contact_steps_for(bioguide)
        except LegislatorNotFoundError as e:
            if debug: print("skipping, %s..." % e, flush=True)
            continue

        legislator['terms'][-1]['contact_form'] = steps['contact_form']['steps'][0]['visit']

    print("Saving data to %s..." % filename)
    save_data(legislators, filename)
Example #2
0
def main(args):
	'''Module main function'''
	global database
	global genetic_algorithm
	global joint_positions
	global goal_positions
	pygame.init()
	random.seed()
	database = utils.initialize_database(args, 'RobotTrainingData')
	database.set_objective_names(['Tiempo', r'Error en $\theta_1$', r'Error en $\theta_2$', r'Error en $\theta_3$', 'Energía'])
	problem = EV3Problem()
	generation = database.properties['highest_population']
	population_size = database.properties['population_size']
	genetic_algorithm = evolution.NSGA(problem, population_size)

	x_path = os.path.abspath(pkg_resources.resource_filename('resources.ev3', 'x_train.txt'))
	y_path = os.path.abspath(pkg_resources.resource_filename('resources.ev3', 'y_train.txt'))
	batch_start = (generation % 10) * N_GOALS
	joint_positions = np.loadtxt(x_path)[batch_start : batch_start + N_GOALS, :]
	goal_positions = np.loadtxt(y_path)[batch_start : batch_start + N_GOALS, :]

	if generation > 0:
		parents, children = utils.load_data(database)
		genetic_algorithm.set_population(parents)
		genetic_algorithm.set_children(children)
	for _ in range(args.iterations):
		generation += 1
		print('Starting generation ' + str(generation))
		genetic_algorithm.iterate()
		database.create_population()
		utils.save_data(genetic_algorithm, database)
		print('=' * (SCREEN_WIDTH - 1))
  def resolvefb():
    updated_media = []
    for m in media:
      social = m['social']

      if 'facebook' in social and social['facebook']:
        graph_url = "https://graph.facebook.com/%s" % social['facebook']

        if re.match('\d+', social['facebook']):
          social['facebook_id'] = social['facebook']
          fbobj = requests.get(graph_url).json()
          if 'username' in fbobj:
            social['facebook'] = fbobj['username']

        else:
          try:
            social['facebook_id'] = requests.get(graph_url).json()['id']
          except:
            print "Unable to get graph ID for: %s" % social['facebook']
            social['facebook_id'] = None

      updated_media.append(m)

    print "Saving social media..."
    save_data(updated_media, "legislators-social-media.yaml")
Example #4
0
def run():
    # load in members, orient by bioguide ID
    print("Loading current legislators...")
    current = load_data("legislators-current.yaml")

    current_bioguide = { }
    for m in current:
      if "bioguide" in m["id"]:
        current_bioguide[m["id"]["bioguide"]] = m

    # remove out-of-office people from current committee membership
    print("Sweeping committee membership...")
    membership_current = load_data("committee-membership-current.yaml")
    for committee_id in list(membership_current.keys()):
      for member in membership_current[committee_id]:
        if member["bioguide"] not in current_bioguide:
          print("\t[%s] Ding ding ding! (%s)" % (member["bioguide"], member["name"]))
          membership_current[committee_id].remove(member)
    save_data(membership_current, "committee-membership-current.yaml")

    # remove out-of-office people from social media info
    print("Sweeping social media accounts...")
    socialmedia_current = load_data("legislators-social-media.yaml")
    for member in list(socialmedia_current):
      if member["id"]["bioguide"] not in current_bioguide:
        print("\t[%s] Ding ding ding! (%s)" % (member["id"]["bioguide"], member["social"]))
        socialmedia_current.remove(member)
    save_data(socialmedia_current, "legislators-social-media.yaml")
Example #5
0
def update_coordinates(matchings, filename, geocode_serv, all_=False):
    from datetime import datetime
    from utils.geocode import distance
    from utils import save_data
    
    schools_ = sorted(matchings.values(), key=lambda elem:datetime.strptime(elem['last_modified_at'], '%Y-%m-%d %H:%M:%S.%f'))
    counter1, counter2, counter3, counter4 = 0, 0, 0, 0
    for  school in schools_:
        counter1 += 1
        if ('address' in school
            and 'number' in school['address']
            and ('geocoded' not in school['address'] or school['address']['geocoded'] == False)
            and (all_ or not all_ and 'coordinates' not in school['address'])): 
            counter2 += 1
            full_address = full_address(school['address'])
            coord = geocode_serv(full_address)
            if coord:
                counter3 += 1
                if 'coordinates' not in school['address'] or distance(coord, school['address']['coordinates']) > 0.1: counter4 += 1
                school['address']['geocoded'] = True
                school['address']['coordinates'] = coord
            school['last_modified_at'] = str(datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f'))
            if (counter2 % 10 == 0): save_data(matchings, filename)
            str_counter = str(counter4) + '/' + str(counter3) + '/' + str(counter2) + '/' + str(counter1) + '/' + str(len(schools_))
            print (str_counter + ': ' + full_address + ' >> ' + str(coord))
Example #6
0
 def refresh_categories(self):
     from utils import save_data
     print ('Refreshing FS categories...')
     categories = self._api_venues.venues.categories()['categories']
     self.categories_by_id, self.categories_by_name = _prepare_categories(categories)
     save_data(categories, self._filename)
     print('Done.')
def main(state_num):
    matches_filename = 'matches_%d' % state_num
    print 'Loading %s ...' % matches_filename
    matches = utils.load_data(matches_filename)

    matches_reduced_filename = 'matches_reduced'
    try:
        print "Loading matches_reduced ..."
        matches_reduced = utils.load_data(matches_reduced_filename)
    except:
        print "Matches_reduced doesn't exists, creating new."
        matches_reduced = {}

    num_matches = len(matches.keys())

    for keyIdx, matchId in enumerate(matches.keys()):
        print "\rMatch %d out of %d [%0.1f%%]" % (keyIdx + 1, num_matches, (keyIdx + 1) / float(num_matches) * 100),

        summoners = []
        num_summoners = len(matches[matchId]['participants'])
        for i in range(num_summoners):
            champLevel = matches[matchId]['participants'][i]['stats']['champLevel']
            summonerId = matches[matchId]['participantIdentities'][i]['player']['summonerId']
            winner = matches[matchId]['participants'][i]['stats']['winner']
            summoners += [{'champLevel': champLevel, 'summonerId': summonerId, 'winner': winner}]
        matches_reduced[matchId] = {'summoners': summoners}

    print "Saving %s ..." % matches_reduced_filename
    utils.save_data(matches_reduced, matches_reduced_filename)
    print "Done!"
  def update():
    for rec in csv.DictReader(open("cache/social_media/%s_candidates.csv" % service)):
      bioguide = rec["bioguide"]
      candidate = rec["candidate"]

      if media_bioguide.has_key(bioguide):
        media_bioguide[bioguide]['social'][service] = candidate
      else:
        new_media = {'id': {}, 'social': {}}

        new_media['id']['bioguide'] = bioguide
        thomas_id = current_bioguide[bioguide]['id'].get("thomas", None)
        govtrack_id = current_bioguide[bioguide]['id'].get("govtrack", None)
        if thomas_id:
          new_media['id']['thomas'] = thomas_id
        if govtrack_id:
          new_media['id']['govtrack'] = govtrack_id


        new_media['social'][service] = candidate
        media.append(new_media)

    print "Saving social media..."
    save_data(media, "legislators-social-media.yaml")

    # if it's a youtube update, always do the resolve
    if service == "youtube":
      resolveyt()
  def resolvefb():
    updated_media = []
    for m in media:
      social = m['social']

      if ('facebook' in social and social['facebook']) and ('facebook_id' not in social):
        graph_url = "https://graph.facebook.com/%s" % social['facebook']

        if re.match('\d+', social['facebook']):
          social['facebook_id'] = social['facebook']
          print("Looking up graph username for %s" % social['facebook'])
          fbobj = requests.get(graph_url).json()
          if 'username' in fbobj:
            print("\tGot graph username of %s" % fbobj['username'])
            social['facebook'] = fbobj['username']
          else:
            print("\tUnable to get graph username")

        else:
          try:
            print("Looking up graph ID for %s" % social['facebook'])
            fbobj = requests.get(graph_url).json()
            if 'id' in fbobj:
              print("\tGot graph ID of %s" % fbobj['id'])
              social['facebook_id'] = fbobj['id']
            else:
              print("\tUnable to get graph ID")
          except:
            print("\tUnable to get graph ID for: %s" % social['facebook'])
            social['facebook_id'] = None

      updated_media.append(m)

    print("Saving social media...")
    save_data(updated_media, "legislators-social-media.yaml")
  def resolveig():
    # in order to preserve the comment block at the top of the file,
    # copy it over into a new RtYamlList instance. We do this because
    # Python list instances can't hold other random attributes.
    import rtyaml
    updated_media = rtyaml.RtYamlList()
    if hasattr(media, '__initial_comment_block'):
      updated_media.__initial_comment_block = getattr(media, '__initial_comment_block')

    client_id_file = open('cache/instagram_client_id','r')
    client_id = client_id_file.read()

    bioguide = utils.flags().get('bioguide', None)

    for m in media:
      if bioguide and (m['id']['bioguide'] != bioguide):
        updated_media.append(m)
        continue

      social = m['social']
      if 'instagram' not in social and 'instagram_id' not in social:
        updated_media.append(m)
        continue

      instagram_handle = social['instagram']
      query_url = "https://api.instagram.com/v1/users/search?q={query}&client_id={client_id}".format(query=instagram_handle,client_id=client_id)
      instagram_user_search = requests.get(query_url).json()
      for user in instagram_user_search['data']:
        time.sleep(0.5)
        if user['username'] == instagram_handle:
          m['social']['instagram_id'] = int(user['id'])
          print("matched instagram_id {instagram_id} to {instagram_handle}".format(instagram_id=social['instagram_id'],instagram_handle=instagram_handle))
      updated_media.append(m)

    save_data(updated_media, "legislators-social-media.yaml")
Example #11
0
	def put(self, name, content, type=None):
		if type is not None:
			name = '%s.%s' % (name, type)
		else:
			name = name
		path = '%s/%s' % ('/'.join(name[:3]), name)
		save_data(self.path + '/' + path, content)
		return path
Example #12
0
def main():
    if not os.path.exists('../data/matched_points.pkl') or not os.path.exists('../data/links.pkl'):
        print "Saving Data"
        save_data()
    print "Loading Data"
    matched_points, links = load_data()
    print "Data Loaded"
    link_to_slopes = process(matched_points, links, 10)
    save_link_to_slopes(link_to_slopes, links)
Example #13
0
def measure_tips(out_fname='results/experiment_run'):
    """ Compute spiral-tip density for all available data files
    """
    data_dir = 'data'
    files = [os.path.join(data_dir, fn) for fn in os.listdir(data_dir)]

    with Pool(len(files)) as p:
        data = p.map(handle_measure_tips, files)

    save_data(out_fname, data)
def run(legislator_ids=None):
	legislators = utils.load_data('legislators-district-offices.yaml')
	try:
		for l in legislators:
			if legislator_ids and l['id']['bioguide'] not in legislator_ids:
				continue
			geocode_offices(l)
	finally:
		# Save in-progress geocodes in case of keyboard interrupt
		print("Saving data...")
		utils.save_data(legislators, 'legislators-district-offices.yaml')
Example #15
0
def update_enrichments():
    global enrichments
    enrichments = {}
    _add_instituicoes_basicas(enrichments)
    _add_instituicoes_superiores(enrichments)
    _add_attractions(enrichments)
    _add_museums(enrichments)
    _add_theaters(enrichments)
    _add_hospitals(enrichments)
    _add_hotels(enrichments)
    _add_offices(enrichments)
    save_data(enrichments, _filename)
Example #16
0
def run():
    # load in current members
    y = load_data("legislators-current.yaml")
    for m in y:
        # retrieve C-SPAN id, if available, from ProPublica API
        # TODO: use utils.download here
        response = urllib.request.urlopen("https://projects.propublica.org/represent/api/v1/members/%s.json" % m['id']['bioguide']).read()
        j = json.loads(response.decode("utf8"))
        cspan = j['results'][0]['cspan_id']
        if not cspan == '':
            m['id']['cspan'] = int(cspan)
    save_data(y, "legislators-current.yaml")
Example #17
0
def run():
    # load in current members
    y = load_data("legislators-current.yaml")
    for m in y:
        # retrieve C-SPAN id, if available, from NYT API
        # TODO: use utils.download here
        response = urllib.request.urlopen("http://politics.nytimes.com/congress/svc/politics/v3/us/legislative/congress/members/%s.json" % m['id']['bioguide']).read()
        j = json.loads(response.decode("utf8"))
        cspan = j['results'][0]['cspan_id']
        if not cspan == '':
            m['id']['cspan'] = int(cspan)
    save_data(y, "legislators-current.yaml")
Example #18
0
File: main.py Project: kpj/PyWave
def run_system(Generator):
    """ Apply `Generator` and integrate and cache system
    """
    system = Generator(config.grid_size).generate()
    print(system)

    cres = integrate_system(system)

    fname = gen_run_identifier()
    save_data("data/%s" % fname, np.array([cres, system.pacemakers, dict(config)]))

    return system, cres
  def clean():
    print "Loading historical legislators..."
    historical = load_data("legislators-historical.yaml")

    count = 0
    for m in historical:
      if media_bioguide.has_key(m["id"]["bioguide"]):
        media.remove(media_bioguide[m["id"]["bioguide"]])
        count += 1
    print "Removed %i out of office legislators from social media file..." % count

    print "Saving historical legislators..."
    save_data(media, "legislators-social-media.yaml")
def run():
  house_labels = "labels-113.csv"

  names = utils.flags().get('names', False)

  y = load_data("legislators-current.yaml")
  by_district = { }
  for m in y:
    last_term = m['terms'][-1]
    if last_term['type'] != 'sen':
      full_district = "%s%02d" % (last_term['state'], int(last_term['district']))
      by_district[full_district] = m


  for rec in csv.DictReader(open(house_labels)):
    full_district = rec['113 ST/DIS']

    # empty seat - IL-02
    if full_district not in by_district:
      if full_district == "IL02":
        continue
      else:
        raise "No!!"

    rec["MIDDLE"] = rec["MIDDLE"].decode("utf8").strip()
    rec["NICK"] = None
    m = re.match('^(.*) \u201c(.*)\u201d$', rec["MIDDLE"])
    if m:
      rec["MIDDLE"] = m.group(1)
      rec["NICK"] = m.group(2)

    by_district[full_district]['terms'][-1]['office'] = rec["ADDRESS"].strip()

    # only set name fields if we've been asked to (as a stopgap)
    if names:
      by_district[full_district]["name"]["first"] = rec["FIRST"].decode("utf8").strip()
      if rec["MIDDLE"]:
        by_district[full_district]["name"]["middle"] = rec["MIDDLE"]
      if rec["NICK"]:
        by_district[full_district]["name"]["nickname"] = rec["NICK"]
      by_district[full_district]["name"]["last"] = rec["LAST"].decode("utf8").strip()

    if rec["BIOGUIDE ID"] == "G000574":
      # The Clerk has the wrong ID for Alan Grayson!
      rec["BIOGUIDE ID"] = "G000556"

    by_district[full_district]["id"]["bioguide"] = rec["BIOGUIDE ID"]

    print("[%s] Saved" % full_district)

  save_data(y, "legislators-current.yaml")
Example #21
0
def _commit(confirmed_, unconfirmed_):
    from utils import load_data
    from utils import save_data
    
    auditing = load_data(_params['filenames'][5])
    confirmed = load_data(_params['filenames'][6])
    if not confirmed: confirmed = []
    unconfirmed = load_data(_params['filenames'][7])
    if not unconfirmed: unconfirmed = []
    
    i = 0
    while i < len(auditing):
        if  auditing[i]['matching'][0]['venue_id'] in confirmed_:
            auditing[i]['status'] = 'confirmed'
            a = auditing.pop(i)
            confirmed.append(a)
            i -= 1
        elif auditing[i]['matching'][0]['venue_id'] in unconfirmed_:
            auditing[i]['status'] = 'unconfirmed'
            a = auditing.pop(i)
            unconfirmed.append(a)
            i -= 1
        i += 1
    
    save_data([a[0] for a in auditing], _params['filenames'][5])
    save_data(auditing, _params['filenames'][5])
    save_data(confirmed, _params['filenames'][6])
    save_data(unconfirmed, _params['filenames'][7])
Example #22
0
def main():
    args = parse_arguments()
    log = prepare_logging(args.log_level)

    try:
        if args.scrap_nasdaq:
            from utils import scrap_nasdaq
            scrap_nasdaq()
            log.info("Data written to nasdaq.txt")
            sys.exit(0)

        from random import randint

        if args.restart:
            from subprocess import call
            call(["rm", "-rf", args.dbname])

        import sqlite3
        conn = sqlite3.connect(args.dbname)

        from Scrappers.Google import Google
        from utils import save_data, diff_to_last_record
        google = Google("http://www.google.com/finance/getprices?i=||INTERVAL||&p=||OFFSET||d&f=d,o,h,l,c,v&df=cpct&q=||SYMBOL||", 60)
        symbols = [symb.strip() for symb in open(args.input, "r").readlines()]
        for symbol in symbols:
            log.info("Processing {}".format(symbol))
            try:
                if args.restart:
                    diff_days = args.ticks
                else:
                    diff_days = diff_to_last_record(conn, symbol)
                    if diff_days > 10:
                        log.error("Interval is bigger than google supports! There will be a gap in the data!")
                        case = input("Should I proceed? Y/N")
                        if case != "Y":
                            raise RuntimeError("No data available to such a far in history.")
                save_data(conn, symbol.lower(), google.scrap_symbol(symbol, diff_days))
            except RuntimeError:
                log.warning("No data available for {} from google".format(symbol))
                open(args.unresponsive_log, "a").write(symbol + "\n")
            finally:
                import time
                time.sleep(randint(1,args.sleep))

    except KeyboardInterrupt as ex:
        log.warning("Terminated by user.")
    except SystemExit as ex:
        log.info("Exiting")
Example #23
0
    def transfer_all_data(params):
        ph = load_data(os.path.join(params.save_data, "ph"))
        CIFAR10 = Load_CIFAR10(params)
        all_train_images, all_train_labels = CIFAR10.get_train_set(ratio=1)
        print(all_train_images.shape)
        batch_size = 5000
        for i in range(0, all_train_images.shape[0], batch_size):
            print(i, "-", i + batch_size)
            out_i = ph.transform(all_train_images[i:i + batch_size])
            for j in range(params.num_layers):
                save_data(
                    out_i[j],
                    os.path.join(params.save_data, 'out_{}_{}'.format(i, j)))

        del all_train_images
        save_data(all_train_labels,
                  os.path.join(params.save_data, "all_train_labels"))
        print("All transfered data saved")
def run_predictor(art,predictor,foldername,filename_prefix,testing_mode=False,skip_save=False,prepend_data_folder=True):
    # prepend_data_folder - Adds ~/src/mindpocket/data prefix to squaddir folder name
    from utils import save_data, load_data, exists_datafolder
    from utils import merge_artfiles

    verbose2_on=False

    # Loop through and add results field to data
    art2 = art.copy()
    for i,a in enumerate(art):
        filename = filename_prefix + '_art_' + str(i).zfill(3) + '.json'

        # Do a short test to see if file exists
        file_exists = exists_datafolder(filename,foldername,prepend_data_folder)
        if file_exists:
            print("File: " + filename + " already exists. Skipping...")
            continue        # If file already exists, skip over to the next file

        # Otherwise with operation
        print("Article number:" + str(i).zfill(3) + ". Saving to: " + filename)
        for j,p in enumerate(a['paragraphs']):
            if verbose2_on:
                print("\tParagraph number: " + str(j))
            if not testing_mode:
                results = predictor.predict(sentence=p['context'])
                if verbose2_on:
                    for word, tag in zip(results["words"], results["tags"]):
                        print(f"{word}\t{tag}")

                # Merge words and tags together into 1 long sentence, for more efficient json storage
                results2 = {
                        'words': ' '.join(results['words']),
                        'tags': ' '.join(results['tags']),
                    }
            else:
                results = 'asdf'
                results2 = 'asdf'
            art2[i]['paragraphs'][j]['allenNER']=results2

        # Save individual articles
        if not skip_save: save_data(art2[i],filename,foldername,[],[],prepend_data_folder)

    # Once all individual files have been saved, merge into 1 large json file
    merge_artfiles(filename_prefix + '_art_*',foldername,filename_prefix + '.json',verbose=True,do_overwrite=[],prepend_data_folder=prepend_data_folder)
    def __init__(self,
                 fold,
                 detection_results_dir='../output/detection_results',
                 classification_results_dir='../output/classification_results',
                 train_on_all_dataset=True,
                 load_only_video_ids=None,
                 is_test=False):

        self.video_clips = dataset.video_clips(is_test=is_test)

        if load_only_video_ids is not None:
            all_video_ids = set(load_only_video_ids)
        else:
            all_video_ids = set(self.video_clips.keys())

        if train_on_all_dataset:
            self.test_video_ids = []
            self.train_video_ids = all_video_ids
        else:
            self.test_video_ids = set(dataset.fold_test_video_ids(fold))
            self.train_video_ids = all_video_ids.difference(
                self.test_video_ids)

        self.gt = pd.read_csv(
            '../input/N1_fish_N2_fish_-_Training_set_annotations.csv')
        self.gt.dropna(axis=0, inplace=True)
        self.gt['have_frame'] = 1.0

        self.video_frames_count = {}
        self.video_data = {}
        self.video_data_gt = {}

        print('load video data...')
        cache_fn = '../output/sequence_rnn_test.pkl' if is_test else '../output/sequence_rnn_train.pkl'
        try:
            self.video_frames_count, self.video_data, self.video_data_gt, self.columns = utils.load_data(
                cache_fn)
        except FileNotFoundError:
            self.video_frames_count, self.video_data, self.video_data_gt, self.columns = self.load(
                all_video_ids, detection_results_dir,
                classification_results_dir)
            utils.save_data((self.video_frames_count, self.video_data,
                             self.video_data_gt, self.columns), cache_fn)
        print('loaded')
Example #26
0
def run():

    # pick either current or historical
    # order is important here, since current defaults to true
    if utils.flags().get('historical', False):
        filename = "legislators-historical.yaml"
    elif utils.flags().get('current', True):
        filename = "legislators-current.yaml"
    else:
        print("No legislators selected.")
        exit(0)

    print("Loading %s..." % filename)
    legislators = load_data(filename)

    # reoriented cache to access by bioguide ID
    by_bioguide = {}
    for m in legislators:
        if "bioguide" in m["id"]:
            by_bioguide[m["id"]["bioguide"]] = m

    count = 0

    for id in range(8245, 21131):
        print(id)
        url = "http://history.house.gov/People/Detail/%s" % id
        r = requests.get(url, allow_redirects=False)
        if r.status_code == 200:
            dom = lxml.html.parse(io.StringIO(r.text)).getroot()
            try:
                bioguide_link = dom.cssselect("a.view-in-bioguide")[0].get(
                    'href')
                bioguide_id = bioguide_link.split('=')[1]
                by_bioguide[bioguide_id]["id"]["house_history"] = id
                count = count + 1
            except:
                continue
        else:
            continue

    print("Saving data to %s..." % filename)
    save_data(legislators, filename)

    print("Saved %d legislators to %s" % (count, filename))
def run():

  # pick either current or historical
  # order is important here, since current defaults to true
  if utils.flags().get('historical', False):
    filename = "legislators-historical.yaml"
  elif utils.flags().get('current', True):
    filename = "legislators-current.yaml"
  else:
    print("No legislators selected.")
    exit(0)

  print("Loading %s..." % filename)
  legislators = load_data(filename)

  # reoriented cache to access by bioguide ID
  by_bioguide = { }
  for m in legislators:
    if "bioguide" in m["id"]:
      by_bioguide[m["id"]["bioguide"]] = m

  count = 0

  for id in range(8245,21131):
    print(id)
    url = "http://history.house.gov/People/Detail/%s" % id
    r = requests.get(url, allow_redirects=False)
    if r.status_code == 200:
        dom = lxml.html.parse(io.StringIO(r.text)).getroot()
        try:
            bioguide_link = dom.cssselect("a.view-in-bioguide")[0].get('href')
            bioguide_id = bioguide_link.split('=')[1]
            by_bioguide[bioguide_id]["id"]["house_history"] = id
            count = count + 1
        except:
            continue
    else:
        continue

  print("Saving data to %s..." % filename)
  save_data(legislators, filename)

  print("Saved %d legislators to %s" % (count, filename))
Example #28
0
 def test(self, sess):
     
     self.saver.restore(sess, os.path.join(self.checkpoint_dir, "./Colorization"))
     data = glob('./test/*.jpg')
     img = data[0:self.batch_size]
     sample = np.array([load_data(sample_file, n='gray') for sample_file in img]).astype(np.float32)
     sample = sess.run(self.sample_images, feed_dict={self.sample: sample})
     sample = save_data(sample)
     for i in range(self.batch_size):
         cv2.imwrite('./result/{}.jpg'.format(i), sample[i])        
Example #29
0
    def save(self):

        self.save_history()
        self.save_plasmid_description()
        utils.save_data(self.data,
                        self.CONFIG)  #TODO save for each repetition !
        self.save_config()

        # WILL BE DELETED
        # <<<<<<< HEAD
        #         path = PARAMS["w_path_1"]

        #         self.genes = self.genes.transpose()
        #         names = self.genes.index
        #         self.genes.reset_index(drop=True)
        #         self.genes.insert(0,"id",names.values)
        #         self.genes.to_csv(path_or_buf = path+"plasmid_description.csv", index=False, sep=',')

        return
Example #30
0
async def login():
    user_id, login_token = utils.get_user_id(), utils.get_login_token()
    just_logged_in = False
    if not user_id or not login_token:
        #webbrowser.open(SERVER_HOST + '/local-products-login?port='+str(LOCAL_SERVER_PORT), new=0, autoraise=True)

        await utils.show_info(
            "Sincronizador de archivos",
            "No hay ningún usuario guardado. Inicia sesión...")
        user_mail, password = await authenticate_box.ask_login()
        if user_mail == None or password == None:
            exit()
        #user_mail= (await aioconsole.ainput("Correo electrónico: ")).strip()
        #password= (await aioconsole.ainput("Contraseña: ")).strip()
        try:
            user_id, login_token = await server.login(mail=user_mail,
                                                      password=password)
        except RemoteException as e:
            await utils.show_warning("Linarand sincronizador de archivos",
                                     "Hubo un problema. " + str(e))
            return await login()
        utils.set_user_id(user_id)
        utils.set_login_token(login_token)
        utils.save_data()
        just_logged_in = True

    try:
        username = await server.authenticate(user_id=user_id,
                                             token=login_token)
    except RemoteException as e:
        await utils.show_warning(
            "Sincronizador de archivos",
            "Hubo un problema. " + str(e) + ". Eliminando usuario")
        utils.set_user_id(None)
        utils.set_login_token(None)
        utils.save_data()
        return await login()
    if just_logged_in:
        asyncio.ensure_future(
            utils.show_info(
                "Sincronizador de archivos",
                "Sesión iniciada como %s. Puedes ir a la página de Ingeniería Linarand y sincronizar los archivos que desees desde este equipo."
                % username))
Example #31
0
def main():
    args = parse_args()
    train = True
    data_train = load_data(True, False)
    data = data_train
    if args.fin:
        train = False
        data = load_data(train, False)

    #data_train['common'] = data_train.apply(lambda row: list((Counter(row.question1_tk) & Counter(row.question2_tk)).elements()))
    #data_train['diff'] = data_train.apply(lambda row: list(((Counter(row.question1_tk) | Counter(row.question2_tk) - (Counter(row.question1_tk) & Counter(row.question2_tk)).elements()))#

    for vect_type in ["cv", "cv_t", "tf", "tf_t", "tfidf", "tfidf_t"]:
        for i in range(3):
            ngrams = i + 1
            cv = create_cv(data_train, ngrams, vect_type)
            add_features(data, cv, ngrams, vect_type)

    save_data(data, train)
Example #32
0
def run_blending():
    print('data manipulation')
    train_comment = train_data['comment_text'].apply(clean_word)
    test_comment = test_data['comment_text'].apply(clean_word)

    print('split data')
    x_train, x_valid, y_train, y_valid = blending_data_split(
        train_comment, train_data[toxic_types],
        params.blending.data_split.test_size,
        params.blending.data_split.ramdom_state)
    print('create features')
    train_tfidf, valid_tfidf, test_tfidf = tfidf(
        x_train, test_comment, x_valid, params.blending.tfidf.max_word_ngram,
        params.blending.tfidf.max_char_ngram, params.blending.tfidf.stack)
    print('run blending')
    preds = blending(train_tfidf, valid_tfidf, y_train, y_valid, test_tfidf)

    print('save data')
    save_data(file_name='bleding_1', preds=preds, toxic_types=toxic_types)
Example #33
0
def compute_spiral_tip_density(fname, plot=True):
    """ Compute spiral tip density of given example
    """
    cache_dir = 'cache'
    pure_fname = os.path.splitext(os.path.basename(fname))[0]

    if not os.path.isdir(os.path.join(cache_dir, pure_fname)):
        # preprocess input
        camp, pacemaker, used_config = np.load(fname)
        camp = np.rollaxis(camp, 0, 3)
        camp = preprocess_data(camp)

        # compute data
        rolled_camp = np.rollaxis(camp, 2, 0)
        lphase = compute_local_phase_field(camp) # decreases time dim due to tau
        grads = compute_discrete_gradient(lphase)
        singularities = compute_singularity_measure(grads)

        # cache data
        os.path.join(cache_dir, pure_fname, '')
        save_data(os.path.join(cache_dir, pure_fname, 'rolled_camp'), rolled_camp)
        save_data(os.path.join(cache_dir, pure_fname, 'lphase'), lphase)
        save_data(os.path.join(cache_dir, pure_fname, 'grads'), grads)
        save_data(os.path.join(cache_dir, pure_fname, 'singularities'), singularities)
    else:
        print(' > Using cached data')
        rolled_camp = np.load(os.path.join(cache_dir, pure_fname, 'rolled_camp.npy'))
        lphase = np.load(os.path.join(cache_dir, pure_fname, 'lphase.npy'))
        grads = np.load(os.path.join(cache_dir, pure_fname, 'grads.npy'))
        singularities = np.load(os.path.join(cache_dir, pure_fname, 'singularities.npy'))

    # compute singularity measures
    avg_singularity = np.mean(singularities, axis=0)

    thres_singularity = avg_singularity.copy()
    thres_singularity[thres_singularity > np.pi] = 2 * np.pi
    thres_singularity[thres_singularity < -np.pi] = -2 * np.pi
    thres_singularity[(thres_singularity > -np.pi) & (thres_singularity < np.pi)] = 0

    # plot data if needed
    if plot:
        singularity_plot(
            pure_fname,
            rolled_camp, lphase, grads,
            singularities, avg_singularity, thres_singularity
        )

    # compute tip density
    tip_num = np.count_nonzero(thres_singularity)
    tip_density = float(tip_num) / thres_singularity.size

    return tip_density
  def resolvefb():
    # in order to preserve the comment block at the top of the file,
    # copy it over into a new RtYamlList instance. We do this because
    # Python list instances can't hold other random attributes.
    import rtyaml
    updated_media = rtyaml.RtYamlList()
    if hasattr(media, '__initial_comment_block'):
      updated_media.__initial_comment_block = getattr(media, '__initial_comment_block')

    for m in media:
      social = m['social']

      if ('facebook' in social and social['facebook']) and ('facebook_id' not in social):
        graph_url = "https://graph.facebook.com/%s" % social['facebook']

        if re.match('\d+', social['facebook']):
          social['facebook_id'] = social['facebook']
          print("Looking up graph username for %s" % social['facebook'])
          fbobj = requests.get(graph_url).json()
          if 'username' in fbobj:
            print("\tGot graph username of %s" % fbobj['username'])
            social['facebook'] = fbobj['username']
          else:
            print("\tUnable to get graph username")

        else:
          try:
            print("Looking up graph ID for %s" % social['facebook'])
            fbobj = requests.get(graph_url).json()
            if 'id' in fbobj:
              print("\tGot graph ID of %s" % fbobj['id'])
              social['facebook_id'] = fbobj['id']
            else:
              print("\tUnable to get graph ID")
          except:
            print("\tUnable to get graph ID for: %s" % social['facebook'])
            social['facebook_id'] = None

      updated_media.append(m)

    print("Saving social media...")
    save_data(updated_media, "legislators-social-media.yaml")
Example #35
0
  def resolvefb():
    # in order to preserve the comment block at the top of the file,
    # copy it over into a new RtYamlList instance. We do this because
    # Python list instances can't hold other random attributes.
    import rtyaml
    updated_media = rtyaml.RtYamlList()
    if hasattr(media, '__initial_comment_block'):
      updated_media.__initial_comment_block = getattr(media, '__initial_comment_block')

    for m in media:
      social = m['social']

      if ('facebook' in social and social['facebook']) and ('facebook_id' not in social):
        graph_url = "https://graph.facebook.com/%s" % social['facebook']

        if re.match('\d+', social['facebook']):
          social['facebook_id'] = social['facebook']
          print("Looking up graph username for %s" % social['facebook'])
          fbobj = requests.get(graph_url).json()
          if 'username' in fbobj:
            print("\tGot graph username of %s" % fbobj['username'])
            social['facebook'] = fbobj['username']
          else:
            print("\tUnable to get graph username")

        else:
          try:
            print("Looking up graph ID for %s" % social['facebook'])
            fbobj = requests.get(graph_url).json()
            if 'id' in fbobj:
              print("\tGot graph ID of %s" % fbobj['id'])
              social['facebook_id'] = fbobj['id']
            else:
              print("\tUnable to get graph ID")
          except:
            print("\tUnable to get graph ID for: %s" % social['facebook'])
            social['facebook_id'] = None

      updated_media.append(m)

    print("Saving social media...")
    save_data(updated_media, "legislators-social-media.yaml")
Example #36
0
def run():
    # load legislators YAML files
    yamlfiles = {}
    for fn in ('historical', 'current'):
        fn = 'legislators-%s.yaml' % fn
        print("Loading %s..." % fn)
        yamlfiles[fn] = load_data(fn)

    # reoriented cache to access by bioguide ID
    by_bioguide = {}
    known_house_history_ids = set()
    for legislators in yamlfiles.values():
        for m in legislators:
            if "bioguide" in m["id"]:
                by_bioguide[m["id"]["bioguide"]] = m
            if "house_history" in m["id"]:
                known_house_history_ids.add(m["id"]["house_history"])
    count = 0

    # scrape history.house.gov
    if len(sys.argv) == 1:
        id_range = range(22000, 25000)
    else:
        id_range = [int(arg) for arg in sys.argv[1:]]
    for id in id_range:
        # skip known IDs
        if id in known_house_history_ids:
            continue
        print(id)
        bioguide_id = get_bioguide_for_house_history_id(id)
        if bioguide_id and bioguide_id in by_bioguide:
            print(id, bioguide_id)
            by_bioguide[bioguide_id]["id"]["house_history"] = id
            count = count + 1

    # write YAML files to disk
    for filename, legislators in yamlfiles.items():
        print("Saving data to %s..." % filename)
        save_data(legislators, filename)

    # how many updates did we make?
    print("Saved %d legislators" % count)
def run():
  # load legislators YAML files
  yamlfiles = { }
  for fn in ('historical', 'current'):
    fn = 'legislators-%s.yaml' % fn
    print("Loading %s..." % fn)
    yamlfiles[fn] = load_data(fn)

  # reoriented cache to access by bioguide ID
  by_bioguide = { }
  known_house_history_ids = set()
  for legislators in yamlfiles.values():
    for m in legislators:
      if "bioguide" in m["id"]:
        by_bioguide[m["id"]["bioguide"]] = m
      if "house_history" in m["id"]:
        known_house_history_ids.add(m["id"]["house_history"])
  count = 0

  # scrape history.house.gov
  if len(sys.argv) == 1:
    id_range = range(22000, 25000)
  else:
    id_range = [int(arg) for arg in sys.argv[1:]]
  for id in id_range:
    # skip known IDs
    if id in known_house_history_ids:
      continue
    print(id)
    bioguide_id = get_bioguide_for_house_history_id(id)
    if bioguide_id and bioguide_id in by_bioguide:
      print(id, bioguide_id)
      by_bioguide[bioguide_id]["id"]["house_history"] = id
      count = count + 1

  # write YAML files to disk
  for filename, legislators in yamlfiles.items():
    print("Saving data to %s..." % filename)
    save_data(legislators, filename)

  # how many updates did we make?
  print("Saved %d legislators" % count)
Example #38
0
def run_CNN_subwordlevel():
    print('data manipulation')
    train_comment = train_data['comment_text'].apply(clean_subword)
    test_comment = test_data['comment_text'].apply(clean_subword)
    vocab_size = vocab_size(train_comment)

    print('pad sequence')
    x_train_pad, x_test_pad = pad_sequence(
        train_comment,
        test_comment,
        vocab_size,
        max_length=params.CNN_wordlevel.max_length)
    print('run CNN word level')
    preds = CNN_subwordlevel(x_train_pad, train_data[toxic_types], x_test_pad,
                             vocab_size)

    print('save data')
    save_data(file_name='CNN_subwordlevel_1',
              preds=preds,
              toxic_types=toxic_types)
  def update():
    for rec in csv.DictReader(open("cache/social_media/%s_candidates.csv" % service)):
      bioguide = rec["bioguide"]
      candidate = rec["candidate"]

      if media_bioguide.has_key(bioguide):
        media_bioguide[bioguide]['social'][service] = candidate
      else:
        new_media = {'id': {}, 'social': {}}

        new_media['id']['bioguide'] = bioguide
        thomas_id = current_bioguide[bioguide]['id'].get("thomas", None)
        if thomas_id:
          new_media['id']['thomas'] = thomas_id

        new_media['social'][service] = candidate
        media.append(new_media)

    print "Saving social media..."
    save_data(media, "legislators-social-media.yaml")
Example #40
0
def _select_for_auditing():
    from utils import load_data
    from utils import save_data
    from utils.geocode import distance
    
    matchings = load_data(_params['filenames'][0])
    auditing = []
    
    i = 0
    while i >= 0 and i < len(matchings):
        loc1 = matchings[i][0]['address']['coordinates'] if 'coordinates' in matchings[i][0]['address'] else None
        loc2 = matchings[i][1]['address']['coordinates'] if 'coordinates' in matchings[i][1]['address'] else None
        if distance(loc1, loc2) >= 0.6:
            auditing.append({'status':'unconfirmed', 'matching':matchings[i]})
            matchings.pop(i)
            i -= 1
        i += 1
    
    save_data([a['matching'][0] for a in auditing], _params['filenames'][5])
    save_data(matchings, _params['filenames'][0])
Example #41
0
    def convert(self, to, amount, date=None):
        logging.debug(f"Convert {locals()}")
        to = to if isinstance(to, str) else to.name

        if to == self.name:
            return amount

        price_data, requested_price, date = self.get_cached(to,
                                                            date,
                                                            attr="close")

        if requested_price is None:
            requested_price_kraken = get_pair_from_kraken(self.name,
                                                          to,
                                                          client,
                                                          date=date)
            if (date == None) or (date.normalize()
                                  == pd.to_datetime("now").normalize()):
                save_file = f"{self.name}_{to}_latest.pkl"
                requested_price = float(requested_price_kraken)
                save_data(requested_price, save_file)
            else:
                save_file = f"{self.name}_{to}.pkl"
                # Get the exchange rate for the closest date to the requested date
                requested_price = requested_price_kraken.index.get_loc(
                    date, method='nearest')
                requested_price = requested_price_kraken["close"].iloc[
                    requested_price]
                if not isinstance(requested_price, Number):
                    requested_price = requested_price.iloc[0]

                requested_price_kraken = requested_price_kraken[["close"]]
                logging.debug(
                    f"Price data queried {price_data}: {type(price_data)}")

                save_data(requested_price_kraken, save_file)

        logging.debug(f"Price data {price_data}: {type(price_data)}")
        logging.debug(f"Price data {requested_price}: {type(requested_price)}")

        return requested_price * amount
Example #42
0
    def convert(self, to, amount, date=None):
        logging.debug(f"Convert {locals()}")
        to = to if isinstance(to, str) else to.name

        if to == self.name:
            return amount

        price_data, requested_price, date = self.get_cached(to, date)

        if requested_price is None:
            if (date == None) or (date.normalize()
                                  == pd.to_datetime("now").normalize()):
                save_file = f"{self.name}_{to}_latest.pkl"

                price_url = url_join(URL_MARKET_PRICE_FIAT, "latest")
                requested_price = requests.get(price_url,
                                               params={
                                                   "base": self.name
                                               }).json()["rates"][to]
                requested_price = float(requested_price)
                save_data(requested_price, save_file, add_path_prefix=True)
            else:
                save_file = f"{self.name}_{to.name}.pkl"
                date_query = date.strftime("%Y-%m-%d")
                price_url = url_join(URL_MARKET_PRICE_FIAT, date_query)
                requested_price = requests.get(price_url,
                                               params={"base":
                                                       self.name})["rates"][to]
                requested_price = float(requested_price)
                new_value = pd.Series(name=date,
                                      data={
                                          "price": requested_price
                                      }).to_frame()

                if price_data is None:
                    price_data = price_data.append(new_value)
                else:
                    price_data = new_value
                save_data(price_data, save_file, add_path_prefix=True)

        return requested_price * amount
Example #43
0
def _remove_from_matchings(remove):
    from utils import load_data
    from utils import save_data
    
    matchings = load_data(_params['filenames'][0])
    i = 0
    while i < len(matchings):
        if matchings[i][0]['venue_id'] in remove:
            matchings.pop(i)
            i -= 1
        i += 1
    matchings = save_data(matchings, _params['filenames'][0])
def transform(sample_file, dict_file, projection_file, output_file):
    n_samples = utils.count_lines(sample_file)
    n_features = utils.count_lines(dict_file)
    batch_size = 100000

    print 'loading projector...'
    with open(projection_file, 'rb') as f:
        p = utils.zloads(f.read())
        # p = pickle.load(f)

    with open(output_file, 'w') as f:
        for (v, idx, ptr) in utils._load_data_batch(sample_file, batch_size, np.intc, n_samples):
            X = csr_matrix((v, idx, ptr), shape=(len(ptr)-1, n_features))
            print 'transforming... \r',
            sys.stdout.flush()
            T = p.transform(X)

            print 'saving...       \r',
            sys.stdout.flush()
            utils.save_data(f, T)
        print '\nTransform completed.'
Example #45
0
def review_save(unreviewed, approved, flagged):
    if save_locked():
        return
    else:
        # create lockfile
        f = open(LOCKFILE, 'w')
        f.close()

        # reorganize data
        unreviewed_s = review_prep_data_pre_save(unreviewed)
        approved_s = review_prep_data_pre_save(approved)
        flagged_s = review_prep_data_pre_save(flagged)

        save_data(unreviewed_s, 'legislators-district-offices-unreviewed.yaml')
        save_offices(unreviewed_s,
                     unreviewed=True,
                     flagged=False,
                     approved=False,
                     gpo=False)
        save_data(approved_s, 'legislators-district-offices-approved.yaml')
        save_offices(approved_s,
                     unreviewed=False,
                     flagged=False,
                     approved=True,
                     gpo=False)
        save_data(flagged_s, 'legislators-district-offices-flagged.yaml')
        save_offices(flagged_s,
                     unreviewed=False,
                     flagged=True,
                     approved=False,
                     gpo=False)

        # remove lock
        os.unlink(LOCKFILE)
    def resolveig():
        # in order to preserve the comment block at the top of the file,
        # copy it over into a new RtYamlList instance. We do this because
        # Python list instances can't hold other random attributes.
        import rtyaml
        updated_media = rtyaml.RtYamlList()
        if hasattr(media, '__initial_comment_block'):
            updated_media.__initial_comment_block = getattr(
                media, '__initial_comment_block')

        client_id_file = open('cache/instagram_client_id', 'r')
        client_id = client_id_file.read()

        bioguide = utils.flags().get('bioguide', None)

        for m in media:
            if bioguide and (m['id']['bioguide'] != bioguide):
                updated_media.append(m)
                continue

            social = m['social']
            if 'instagram' not in social and 'instagram_id' not in social:
                updated_media.append(m)
                continue

            instagram_handle = social['instagram']
            query_url = "https://api.instagram.com/v1/users/search?q={query}&client_id={client_id}".format(
                query=instagram_handle, client_id=client_id)
            instagram_user_search = requests.get(query_url).json()
            for user in instagram_user_search['data']:
                time.sleep(0.5)
                if user['username'] == instagram_handle:
                    m['social']['instagram_id'] = int(user['id'])
                    print(
                        "matched instagram_id {instagram_id} to {instagram_handle}"
                        .format(instagram_id=social['instagram_id'],
                                instagram_handle=instagram_handle))
            updated_media.append(m)

        save_data(updated_media, "legislators-social-media.yaml")
Example #47
0
    def save_test_set(self):
        """
        Saves a json file with useful information for teh test phase:
            - training size
            - test images IDs
            - attributes
            - batch size
        """

        try:
            test_data = {
                'train_size': self.train_size,
                'test_img_ids': self.test_img_ids,
                'attributes': self.attributes,
                'batch_size': self.batch_size
            }

            file_path = "./test_data"
            save_data(file_path, test_data)
        except:
            raise
        print("Test img_ids successfully saved.")
Example #48
0
def execute_experiment(name, variations, X, y, parameters, kfold):
    """Train neural network for a set of different parameters and save results
    
    Arguments:
        name -- Name of the parameter to be varied.
        variations -- Variations of the parameter.
        X -- Dataset input instances.
        y -- Dataset output classes.
        parameters --  Neural network model parameters.
        kfold -- Object used to create k-folds for crossvalidation.
    """

    parameters = deepcopy(parameters)
    accuracy = {}

    for variation in variations:
        parameters[name] = variation
        accuracy_train = []  # Cross-validation train accuracy
        accuracy_test = []  # Cross-validation test accuracy

        for train_index, test_index in kfold.split(X, y):
            if name == "extra_size":
                results = train_network(X, y, train_index, test_index,
                                        parameters, True)
            else:
                results = train_network(X, y, train_index, test_index,
                                        parameters)

            accuracy_train.append(results["acc"])
            accuracy_test.append(results["val_acc"])

        accuracy[variation] = {
            "train_mean": np.mean(accuracy_train, axis=0),
            "train_std": np.std(accuracy_train, axis=0),
            "test_mean": np.mean(accuracy_test, axis=0),
            "test_std": np.std(accuracy_test, axis=0)
        }

    utils.save_data(name, accuracy)
Example #49
0
def remove_dc(offices):
    output = {}
    removal_count = 0
    for bioguide in offices:
        for office in offices[bioguide]:
            if not office.get('state', '').upper().replace(
                    '.', '').strip() in ('DC', 'DISTRICT OF COLUMBIA'):
                if not output.has_key(bioguide):
                    output[bioguide] = []
                output[bioguide].append(office)
        else:
            removal_count = removal_count + 1

    print "Removed %d D.C. offices." % removal_count
    print "Saving..."
    save_data(output, "legislators-district-offices-unreviewed.yaml")
    save_offices(output,
                 unreviewed=True,
                 flagged=False,
                 approved=False,
                 gpo=False)
    return output
Example #50
0
def info_main(account, info_url, track_id):
    """
    简历详情页主方法
    :return: True : 正常入库...  , False : 出现异常
    """
    # info_url = "http://ehire.51job.com/Candidate/ResumeViewFolder.aspx?hidSeqID=9216079958&hidFolder=EMP"
    refer_url = "http://ehire.51job.com/Inbox/InboxRecentEngine.aspx?Style=1"
    info_html = conn_html(account,
                          info_url,
                          4,
                          refer_url=refer_url,
                          track_id=track_id)
    if 'login' == info_html:
        logger.error("出现登录页面 %s" % account['userName'])
        return 'login'

    if info_html:
        logger.info('获取一条收件箱简历成功!')
        raw = parse_info_html(account, info_html, track_id)
        sql = 'INSERT INTO resume_raw (' \
              'trackId,source,content,createTime,createBy,email,resumeUpdateTime,resumeSubmitTime' \
              ') values(%s, "FIVE_ONE",%s, now(), "python", %s, %s, %s)'
        value = (raw.trackId, raw.content, raw.email, raw.resumeUpdateTime,
                 raw.resumeSubmitTime)
        kafka_data = {
            'trackId': raw.trackId,
            'source': raw.source,
            "channelType": 'WEB',
            'resourceType': 'RESUME_INBOX',
            'resourceDataType': 'RAW',
            'content': raw.to_dict(),
            'protocolType': 'HTTP'
        }
        logger.info('开始保存一份收件箱简历')
        common_utils.save_data(sql, value, kafka_data)
        return True
    else:
        logger.info('获取一条收件箱简历失败!')
        return False
def all_data_to_graphs():
    #genera una nuova struttura simile alla directory mnist_data, con tutte le immagini convertite in file.dat (grafi) (TROPPO PESANTE)
    input_path = './mnist_data/'
    output_path = './mnist_graphs/'
    for dirpath, dirnames, filenames in os.walk(input_path):
        structure = os.path.join(output_path, dirpath[len(input_path):])
        if not os.path.isdir(structure):
            os.mkdir(structure)
        else:
            print("Folder does already exits!")

    for root, dirnames, filenames in os.walk(input_path):
        for file_complete in filenames:
            filename = os.path.join(root, file_complete)
            with open(filename, "r") as f:
                digit = os.path.basename(os.path.dirname(filename))
                graph = mnist_on_plane.to_graph(filename, digit)
                parts = pathlib.Path(filename).parts[1:-1]
                file, file_extension = os.path.splitext(file_complete)
                new_filename = output_path + str(
                    pathlib.Path(*parts)) + '/' + file + '.dat'
                save_data(graph, new_filename)
    def resolvefb():
        updated_media = []
        for m in media:
            social = m['social']

            if ('facebook' in social
                    and social['facebook']) and ('facebook_id' not in social):
                graph_url = "https://graph.facebook.com/%s" % social['facebook']

                if re.match('\d+', social['facebook']):
                    social['facebook_id'] = social['facebook']
                    print "Looking up graph username for %s" % social[
                        'facebook']
                    fbobj = requests.get(graph_url).json()
                    if 'username' in fbobj:
                        print "\tGot graph username of %s" % fbobj['username']
                        social['facebook'] = fbobj['username']
                    else:
                        print "\tUnable to get graph username"

                else:
                    try:
                        print "Looking up graph ID for %s" % social['facebook']
                        fbobj = requests.get(graph_url).json()
                        if 'id' in fbobj:
                            print "\tGot graph ID of %s" % fbobj['id']
                            social['facebook_id'] = fbobj['id']
                        else:
                            print "\tUnable to get graph ID"
                    except:
                        print "\tUnable to get graph ID for: %s" % social[
                            'facebook']
                        social['facebook_id'] = None

            updated_media.append(m)

        print "Saving social media..."
        save_data(updated_media, "legislators-social-media.yaml")
Example #53
0
def change_proxy():
    # proxies must be formatted: 182.52.238.111:30098,103.105.77.22:8181,
    # bad proxies will be updated and skipped next time
    proxies: deque = get_global('PROXIES')
    bad_proxies: str = load_data('bad_proxies.txt')
    while proxies:
        proxy: str = proxies.popleft()
        if proxy in bad_proxies:
            continue
        #log(f'checking proxy {proxy}')
        try:
            old_proxy: str = get_global('PROXY')
            save_data('bad_proxies.txt', old_proxy, end=',')
            set_global('PROXY', proxy)
            set_global('PROXY_ERROR', False)
            # set_global('SLEEP_TIME', USER_SLEEP_TIME)
            if USE_BOT:
                log('Reloading bot')
                get_global('BOT').close()
                set_global(
                    'BOT',
                    Browser(headless=HEADLESS,
                            proxy=proxy,
                            driverpath=WEBDRIVERPATH))
            return
            #acceptable = check_connection()
            #if acceptable:
            #    log(f'using proxy {proxy}')
            #    set_global('PROXIES', proxies)
            #    return
            #else:
            #    save_data('bad_proxies.txt', proxy, end=',')
            #    set_global('PROXY', old_proxy)
            #    set_global('PROXY_ERROR', True)
        except Exception as e:
            log(e)
            change_proxy()
    raise ProxyError('Все прокси использованы.')
Example #54
0
def createInputCSV(start, end, filename):
    """Creates data for a range of numbers and inserts it into a file.

    Parameters
    ----------
        start : int
            Starting index for the data to be created

        end : int
            Last position for the data to be created

        filename : string
            Name of the file to store data into

    """

    # Why list in Python?
    inputData = []
    outputData = []

    # Why do we need training Data?
    for i in range(start, end):
        inputData.append(i)
        outputData.append(FizzBuzz().func(i))

    # Why Dataframe?
    # Dataframes are pandas objects, which themselves are enhanced version of 
    # numpy structured arrays
    # DataFrames are inherently multidimensional arrays with 
    # attached row and column labels, and added support for heterogeneous 
    # types and missing data.
    dataset = {}
    dataset["input"] = inputData
    dataset["label"] = outputData

    save_data(dataset, filename)

    print(filename, "Created!")
Example #55
0
def main():
  args = parse_args()
  initial = False
  if args.ini:
    initial = True
  data_train = load_data(True, initial)
  data_test = load_data(False, initial)

  cv = None
  tfv = None
  tfv_matrix = None
  all_questions = pd.concat([data_train.question1, data_train.question2, data_test.question1, data_test.question2]).unique()
  svd = None

  if args.rec:
    if args.cnt:
      cv, _ = get_count_vectorizer(all_questions)
      pickle.dump(cv, open(PATH_COUNT_VECTORIZER, 'wb'))
      print("count vect dumped and updated")
    if args.tf:
      tfv, tfv_matrix = get_tfidf_vectorizer(all_questions)
      pickle.dump(tfv, open(PATH_TFIDF_VECTORIZER, 'wb'))
      pickle.dump(tfv_matrix, open(PATH_TFIDF_MATRIX, 'wb'))
      print("tfidf vectorizer dumped and updated")
  if args.enr:
    if args.cnt:
      if not args.rec:
        cv  = pickle.load(open(PATH_COUNT_VECTORIZER, "rb"))
      analyzer = cv.build_analyzer()
      if not args.fin:
        data_train['question1_tk'] = data_train.question1.apply(lambda x: analyzer(x))
        data_train['question2_tk'] = data_train.question2.apply(lambda x: analyzer(x))
        save_data(data_train)
      else:
        data_test['question1_tk'] = data_test.question1.apply(lambda x: analyzer(x))
        data_test['question2_tk'] = data_test.question2.apply(lambda x: analyzer(x))
        save_data(data_test, False)
      print("data updated for count_vectorizer features")
def run():
    # load in members, orient by bioguide ID
    print("Loading current legislators...")
    current = load_data("legislators-current.yaml")

    current_bioguide = {}
    for m in current:
        if "bioguide" in m["id"]:
            current_bioguide[m["id"]["bioguide"]] = m

    # go over current members, remove out-of-office people
    membership_current = load_data("committee-membership-current.yaml")
    for committee_id in list(membership_current.keys()):
        print("[%s] Looking through members..." % committee_id)

        for member in membership_current[committee_id]:
            if member["bioguide"] not in current_bioguide:
                print("\t[%s] Ding ding ding! (%s)" %
                      (member["bioguide"], member["name"]))
                membership_current[committee_id].remove(member)

    print("Saving current memberships...")
    save_data(membership_current, "committee-membership-current.yaml")
Example #57
0
def main():
    main_df = load_data(CSV_DIR, CSV_FILE)
    main_df = split_dataset(main_df)
    train_df = extract_and_expand_subset(main_df, 'train')
    save_data(train_df, CSV_DIR, TRAIN_CSV_FILE)
    val_df = extract_and_expand_subset(main_df, 'val')
    save_data(val_df, CSV_DIR, VAL_CSV_FILE)
    test_df = extract_and_expand_subset(main_df, 'test')
    save_data(test_df, CSV_DIR, TEST_CSV_FILE)
Example #58
0
def run():
	if len(sys.argv) != 3:
		print("Usage:")
		print("python retire.py bioguideID termEndDate")
		sys.exit()

	try:
		utils.parse_date(sys.argv[2])
	except:
		print("Invalid date: ", sys.argv[2])
		sys.exit()

	print("Loading current YAML...")
	y = utils.load_data("legislators-current.yaml")
	print("Loading historical YAML...")
	y1 = utils.load_data("legislators-historical.yaml")

	for moc in y:
		if moc["id"].get("bioguide", None) != sys.argv[1]: continue

		print("Updating:")
		rtyaml.pprint(moc["id"])
		print()
		rtyaml.pprint(moc["name"])
		print()
		rtyaml.pprint(moc["terms"][-1])

		moc["terms"][-1]["end"] = sys.argv[2]

		y.remove(moc)
		y1.append(moc)

		break

	print("Saving changes...")
	utils.save_data(y, "legislators-current.yaml")
	utils.save_data(y1, "legislators-historical.yaml")
Example #59
0
def run():

    if len(sys.argv) != 2:
        print("Usage:")
        print("python untire.py bioguideID")
        sys.exit()

    print("Loading current YAML...")
    y = utils.load_data("legislators-current.yaml")
    print("Loading historical YAML...")
    y1 = utils.load_data("legislators-historical.yaml")

    for moc in y1:
        if moc["id"].get("bioguide", None) != sys.argv[1]: continue

        print("Updating:")
        rtyaml.pprint(moc["id"])
        print()
        rtyaml.pprint(moc["name"])

        moc["terms"].append(
            OrderedDict([
                ("type", moc["terms"][-1]["type"]),
                ("start", None),
                ("end", None),
                ("state", moc["terms"][-1]["state"]),
                ("party", moc["terms"][-1]["party"]),
            ]))

        y1.remove(moc)
        y.append(moc)

        break

    print("Saving changes...")
    utils.save_data(y, "legislators-current.yaml")
    utils.save_data(y1, "legislators-historical.yaml")
Example #60
0
def upload():
    """Accept data to be uploaded. Either JSON format generic data,
       or thumbnails as jpegs from camera
        request.form:
            id (str): Identifier name of camera
            token (str): Secret token to allow uploads
            type (str): "data" for json upload, "small" or "crop" for JPGs
        Returns:
            None
    """
    if request.method == "POST":
        id = request.form["id"]
        token = request.form["token"]
        upload_type = request.form["type"]
        if token != app.config["UPLOAD_TOKEN"]:
            return "", 401
        if upload_type == "data":
            s_filename = safe_name("{}-{}-{}.json".format(
                id, upload_type, int(time.time())))
            filename = os.path.join(app.config["UPLOAD_PATH"], "data",
                                    s_filename)
            logging.info("Saving file {}".format(s_filename))
            save_data(request.form["data"], filename)
            set_rights(filename)

        if upload_type in ("small", "crop"):
            file = request.files["file"]
            if file:
                s_filename = safe_name("{}-{}.jpg".format(id, upload_type))
                filename = os.path.join(app.config["UPLOAD_PATH"], "view",
                                        s_filename)
                logging.info("Saving file {}".format(s_filename))
                file.save(filename)
                set_rights(filename)
                return "", 200

    return "", 200