def main(): regions = settings.REGIONS # Get regions from config clean_results = {} if regions[0] == "global": # If they want you to clean global resources print("Cleaning Global Resources.") helpers.set_session() # Set a blank session for global resources clean_results["global"] = clean_account_globally() print("Finished Cleaning Globally.") regions.pop(0) for region in regions: print("Cleaning In {0}.".format(region)) helpers.set_session(region) # Set current region clean_results[region] = clean_account_regionally() # Clean current regions resources print("Finished Cleaning In {0}.".format(region)) helpers.save_results(clean_results)
def run_session(options, args): """ Runs the training simulation given a parsed set of options and its leftover elements. Returns a dictionary of {key: value} pairings with information on the results of the simulation. """ # leftover args are class names for training methodology if len(args) == 0: # default learners_to_run = ['Learner'] else: learners_to_run = parse_learners(args) options.learner_class_names = learners_to_run options.learner_classes = load_modules(options.learner_class_names) n = len(learners_to_run) options.video = options.train_tick > 0 learner_histories = {} taught_learners = {} learner_scores = {} # train each class and store results for learner, learner_class in options.learner_classes.iteritems(): hist, learned = session(learner, options) learner_histories[learner] = hist taught_learners[learner] = learned learner_scores[learner] = get_score( hist, options.test_iters if options.test_iters != 0 else min( 100, options.train_iters)) # TODO : Here, we have access to each learner's training history as we # as the trained learner. Should do stuff with it. # create a plot class for each learner plots = [ Plots(learner_histories[learner], learner) for learner in options.learner_class_names ] # generate plots for each learner learner_plots = {} for plot in plots: learner_plots[plot.learner] = list(plot.plot_score_by_epoch()) dist = plot.plot_distribution(options.test_iters) if dist is not None: learner_plots[plot.learner].append(dist) # save results if possible if not helpers.save_results(options.learner_class_names, taught_learners, learner_histories, learner_scores, learner_plots, options.outfile): print "Failed to save results."
def start(self): delimiter() print(f"Starting experiment {self.experiments_name}...") delimiter() for fold in self.folds: for target in self.targets: print(f'Processing fold: {fold} and target: {target}') delimiter() train_input_indices, train_output, val_input_indices, val_output, test_input_indices, test_output = self.data_frame.\ get_train_val_test_input_output(target, fold, self.validation_set_percentage, self.random_state, self.targets_type, self.prediction_type) model, models_identifier, parameters_dict = self.get_the_best_model(target, fold, train_input_indices, train_output, val_input_indices, val_output) print(f'Apply best model to test for {target} on fold {fold}.') test_loader = self.data_frame.create_minibatches(test_input_indices, test_output, 1, self.cuda_device, model.convert_input) regularization = get_regularization(parameters_dict[self.REGULARIZATION]) alpha = parameters_dict[self.ALPHA] test_loss, test_logits, test_true = apply(self.debugger, model, self.loss_function, test_loader, regularization, alpha, self.cuda_device) results = model.models_metrics(test_logits, test_true) self.metrics_handler.update_test_results(models_identifier, results) self.metrics_handler.print_test_results(test_loss, results) helpers.save_results(test_logits, test_true, helpers.get_predictions_file_name(self.output_directory, models_identifier, target, fold, self.run_identificator), self.prediction_type) self.models_performance_saver.flush_data() print(f"+++ Finished with training and testing model for {target} on fold {fold}. +++") delimiter() delimiter()
print_line = "\rEpoch: {} Train loss: {:.6f} Val accuracy: {:.4f}% Loss: {:.6f} Minutes: {:.2f}{}".format( epoch + 1, loss_train, acc_val * 100, loss_val, epoch_time, " (improved)" if loss_val < best_loss_val else "") print(print_line) sys.__stdout__.write(print_line) # And save the model if it improved: if loss_val < best_loss_val: save_path = saver.save(sess, checkpoint_path) best_loss_val = loss_val # ## Plot loss and accuracy # In[56]: save_results(validation_accuracy_values, training_loss_values_final, validation_loss_values, model_name) # ## Run network with test data # In[57]: batch_size = 100 test_images = test_images_original - mean test_images /= std test_labels = test_labels_original n_iterations_test = len(test_images) // batch_size checkpoint_path = model_name
def read_csv(source): #for reading unicode #f = codecs.open(source, 'r', encoding='utf-8') city_options = City.objects.filter(tag="ann_arbor") print len(city_options) if not len(city_options): city = City() city.name = "Ann Arbor" city.tag = to_tag(city.name) city.save() else: city = city_options[0] print city #TODO: #setup FeedInfo item #and also create a Source item permit_sub_types = [] status_types = [] building_nums = [] applicants = [] managers = [] cache_file = "%s.json" % city.tag cache_destination = os.path.join(os.path.dirname(source), cache_file) #keep a local copy of data we've processed... #this should help with subsequent calls #to make sure we don't need to duplicate calls to remote geolocation APIs: local_cache = load_json(cache_destination, create=True) if not local_cache.has_key('buildings'): local_cache['buildings'] = {} if not local_cache.has_key('parcels'): local_cache['parcels'] = {} locations = {} for key, value in local_cache['buildings'].items(): locations[key] = Location(value) #geocoder helper: geo = Geo() #with open('eggs.csv', 'rb') as csvfile: with codecs.open(source, 'rb', encoding='utf-8') as csvfile: #reader = csv.reader(csvfile, delimiter=' ', quotechar='|') reader = csv.reader(csvfile) #just print the first row: print '>, <'.join(reader.next()) count = 0 for row in reader: count += 1 #could exit out early here, if needed if count > 10: pass print row #type of building (eg: sf attached, duplex, etc) permit_id = row[0] #should always be "RENTAL" (don't need to track this one) permit_type = row[1] if not permit_type == "RENTAL" and not permit_type == "MECHANICAL": raise ValueError, "Unexpected permit type: %s in row: %s" % ( permit_type, row) sub_type = row[2] #can use this to filter out non-rental or obsolete entries #don't need to track otherwise: status = row[3] parcel_id = row[4] address = row[5] #should be fixed per source: city = row[6] if not ((city.lower() == 'ann arbor') or (city == '')): raise ValueError, "Unexpected city: %s" % (city) sqft = row[7] number_of_buildings = row[8] applicant_name = row[9] number_of_stories = row[10] number_of_units = row[11] if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']): #check if we've started processing any results for this row #if local_cache['buildings'].has_key(address.upper()): # local_cache_cur = local_cache['buildings'][address.upper()] #else: # local_cache_cur = {} if locations.has_key(address.upper()): location = locations[address.upper()] else: location = Location() #do some geocoding, as needed: search = "%s, Ann Arbor MI" % address.upper() for source in location.sources: geo.lookup(search, source, location) location.address_alt = search locations[address.upper()] = location #local_cache['buildings'][address.upper()] = local_cache_cur #and check if a previous building object in the db exists #CREATE A NEW BUILDING OBJECT HERE #cur_building = Building() bldg = Building() bldg.type = sub_type #back it up for later local_cache['buildings'] = {} for key, value in locations.items(): local_cache['buildings'][key] = value.to_dict() save_json(cache_destination, local_cache) #exit() #THE FOLLOWING ARE FOR INFORMATIONAL PURPOSES ONLY #(to see what data is available) if not status in status_types: #print "adding: %s" % sub_type status_types.append(status) if not sub_type in permit_sub_types: #print "adding: %s" % sub_type permit_sub_types.append(sub_type) building_num = row[8] if not building_num in building_nums: #print "adding: %s" % sub_type building_nums.append(building_num) applicant = row[9] if (re.search('MGMT', applicant) or re.search('REALTY', applicant) or re.search('PROPERTIES', applicant) or re.search('MANAGEMENT', applicant) or re.search('GROUP', applicant) or re.search('LLC', applicant) or re.search('L.L.C.', applicant) or re.search('INC', applicant)): if not applicant in managers: managers.append(applicant) else: if not applicant in applicants: applicants.append(applicant) #print ', '.join(row) #print ## print permit_sub_types print status_types print building_nums save_results(locations)
def read_csv(source_csv, city_name, city_tag): city_options = City.objects.filter(tag=city_tag) print "Number of cities available: %s" % len(city_options) if not len(city_options): raise ValueError, "CITY NOT FOUND! run make_cities.py first" ## city = City() ## city.name = city_name ## city.tag = to_tag(city.name) ## city.save() else: city = city_options[0] print city feed_date = "2013-07-31" feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date) if feeds.exists(): feed = feeds[0] print "Already had feed: %s, %s" % (feed.city, feed.added) else: feed = FeedInfo() feed.city = city feed.added = feed_date feed.version = "0.1" feed.save() print "Created new feed: %s" % feed.city.name people = Person.objects.filter(name="Blank") if people.exists(): person = people[0] print "Already had person: %s" % (person.name) else: person = Person() person.name = "Blank" person.save() print "Created new person: %s" % person.name sources = Source.objects.filter(feed=feed) if sources.exists(): feed_source = sources[0] print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added) else: feed_source = Source() feed_source.feed = feed feed_source.person = person feed_source.save() print "Created new source: %s" % feed_source.feed.city.name cache_file = "%s.json" % city.tag cache_destination = os.path.join(os.path.dirname(source_csv), cache_file) #keep a local copy of data we've processed... #this should help with subsequent calls #to make sure we don't need to duplicate calls to remote geolocation APIs: local_cache = load_json(cache_destination, create=True) if not local_cache.has_key('buildings'): local_cache['buildings'] = {} if not local_cache.has_key('parcels'): local_cache['parcels'] = {} locations = {} for key, value in local_cache['buildings'].items(): locations[key] = Location(value) #geocoder helper: geo = Geo() skips = 0 #with codecs.open(source_csv, 'rb', encoding='utf-8') as csvfile: with open(source_csv) as csvfile: #reader = csv.reader(csvfile, delimiter=' ', quotechar='|') #reader = csv.reader(csvfile) #reader = unicodecsv.UnicodeReader(csvfile, encoding='utf-8') reader = unicode_csv_reader(csvfile) #just print the first row: print '>, <'.join(reader.next()) count = 0 for row in reader: count += 1 print "Looking at row: %s" % count #could exit out early here, if needed if count > 1000: #exit() pass address = row[0] #need to fix the number being at the end of the address parts = address.split(',') anumber = parts[-1] parts = parts[:-1] street = ",".join(parts) address = "%s %s" % (anumber, street) invoice_number = row[1] bldg_id = row[1] print bldg_id #this is where owner is stored invoice_note = row[6] print invoice_note if re.match('Sent to:', invoice_note): print "changing invoice note from: %s" % invoice_note invoice_note = invoice_note[8:] print "to: %s" % invoice_note else: #raise ValueError, "invoice note does not start with Sent to" print "!!!!!invoice note does not start with Sent to!!!!!" print "" print "" no_units = row[12] ## #should always be "RENTAL" (don't need to track this one) ## permit_type = row[1] ## if not permit_type == "RENTAL" and not permit_type == "MECHANICAL": ## raise ValueError, "Unexpected permit type: %s in row: %s" % ( ## permit_type, row) ## bldg_type = row[2] ## #can use this to filter out non-rental or obsolete entries ## #don't need to track otherwise: ## status = row[3] ## parcel_id = row[4] ## #should be fixed per source: ## ss_city = row[6] ## bldg_sf = row[7] ## no_bldgs = row[8] ## applicant_name = row[9] ## no_stories = row[10] ## no_units = row[11] ## if not ( (ss_city.lower() == city_name.lower()) or (ss_city == '') ): ## raise ValueError, "Unexpected city: %s" % (ss_city) ## sqft = row[7] ## number_of_buildings = row[8] ## applicant_name = row[9] ## number_of_stories = row[10] ## number_of_units = row[11] #check if this is one we want to skip if conversions.has_key(address.upper()): address = conversions[address.upper()] ## if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']): #make sure it's not one we're skipping: if not address: print "SKIPPING ITEM: %s" % row[1] skips += 1 else: #check if we've started processing any results for this row if locations.has_key(address.upper()): location = locations[address.upper()] else: location = Location() #temporarily just want to look at google again #location.sources = ["google"] #location.sources = ["google", "bing", "usgeo", "geonames", "openmq"] location.sources = ["google", "bing"] #do some geocoding, as needed: search = "%s, %s, %s" % (address.upper(), city_name, city.state) any_updated = False for geo_source in location.sources: update = geo.lookup(search, geo_source, location, force=True) #update = geo.lookup(search, geo_source, location, force=False) if update: any_updated = True location.sources = ["google", "bing", "usgeo", "geonames", "openmq", "mq"] #this is the case for brand new searches #(which are updated in a different sense) if not hasattr(location, "address_alt") or not location.address_alt: any_updated = True location.address_alt = search #location.bldg_units = bldg_units #location.units_bdrms = units_bdrms locations[address.upper()] = location #handle the database storage bldg = make_building(location, bldg_id, city, feed_source, no_units=no_units) if invoice_note: (person, bldg_person) = make_person(invoice_note, bldg, "Permit Applicant") if any_updated: #back it up for later #enable this when downloading GPS coordinates... #the rest of the time it slows things down local_cache['buildings'] = {} for key, value in locations.items(): local_cache['buildings'][key] = value.to_dict() save_json(cache_destination, local_cache) print destination = '%s.tsv' % city_tag save_results(locations, destination)
def read_csv(source): #for reading unicode #f = codecs.open(source, 'r', encoding='utf-8') city_options = City.objects.filter(tag="ann_arbor") print len(city_options) if not len(city_options): city = City() city.name = "Ann Arbor" city.tag = to_tag(city.name) city.save() else: city = city_options[0] print city #TODO: #setup FeedInfo item #and also create a Source item permit_sub_types = [] status_types = [] building_nums = [] applicants = [] managers = [] cache_file = "%s.json" % city.tag cache_destination = os.path.join(os.path.dirname(source), cache_file) #keep a local copy of data we've processed... #this should help with subsequent calls #to make sure we don't need to duplicate calls to remote geolocation APIs: local_cache = load_json(cache_destination, create=True) if not local_cache.has_key('buildings'): local_cache['buildings'] = {} if not local_cache.has_key('parcels'): local_cache['parcels'] = {} locations = {} for key, value in local_cache['buildings'].items(): locations[key] = Location(value) #geocoder helper: geo = Geo() #with open('eggs.csv', 'rb') as csvfile: with codecs.open(source, 'rb', encoding='utf-8') as csvfile: #reader = csv.reader(csvfile, delimiter=' ', quotechar='|') reader = csv.reader(csvfile) #just print the first row: print '>, <'.join(reader.next()) count = 0 for row in reader: count += 1 #could exit out early here, if needed if count > 10: pass print row #type of building (eg: sf attached, duplex, etc) permit_id = row[0] #should always be "RENTAL" (don't need to track this one) permit_type = row[1] if not permit_type == "RENTAL" and not permit_type == "MECHANICAL": raise ValueError, "Unexpected permit type: %s in row: %s" % ( permit_type, row) sub_type = row[2] #can use this to filter out non-rental or obsolete entries #don't need to track otherwise: status = row[3] parcel_id = row[4] address = row[5] #should be fixed per source: city = row[6] if not ( (city.lower() == 'ann arbor') or (city == '') ): raise ValueError, "Unexpected city: %s" % (city) sqft = row[7] number_of_buildings = row[8] applicant_name = row[9] number_of_stories = row[10] number_of_units = row[11] if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']): #check if we've started processing any results for this row #if local_cache['buildings'].has_key(address.upper()): # local_cache_cur = local_cache['buildings'][address.upper()] #else: # local_cache_cur = {} if locations.has_key(address.upper()): location = locations[address.upper()] else: location = Location() #do some geocoding, as needed: search = "%s, Ann Arbor MI" % address.upper() for source in location.sources: geo.lookup(search, source, location) location.address_alt = search locations[address.upper()] = location #local_cache['buildings'][address.upper()] = local_cache_cur #and check if a previous building object in the db exists #CREATE A NEW BUILDING OBJECT HERE #cur_building = Building() bldg = Building() bldg.type = sub_type #back it up for later local_cache['buildings'] = {} for key, value in locations.items(): local_cache['buildings'][key] = value.to_dict() save_json(cache_destination, local_cache) #exit() #THE FOLLOWING ARE FOR INFORMATIONAL PURPOSES ONLY #(to see what data is available) if not status in status_types: #print "adding: %s" % sub_type status_types.append(status) if not sub_type in permit_sub_types: #print "adding: %s" % sub_type permit_sub_types.append(sub_type) building_num = row[8] if not building_num in building_nums: #print "adding: %s" % sub_type building_nums.append(building_num) applicant = row[9] if ( re.search('MGMT', applicant) or re.search('REALTY', applicant) or re.search('PROPERTIES', applicant) or re.search('MANAGEMENT', applicant) or re.search('GROUP', applicant) or re.search('LLC', applicant) or re.search('L.L.C.', applicant) or re.search('INC', applicant) ): if not applicant in managers: managers.append(applicant) else: if not applicant in applicants: applicants.append(applicant) #print ', '.join(row) #print ## print permit_sub_types print status_types print building_nums save_results(locations)
frequencies = calculate_frequency(EOD, sampling_frequency, estimated_frequency=f_estimate, crossing_threshold=threshold, temporal_threshold=0.05, method='median', ascending=False) cv = '{:.2e}'.format(np.std(frequencies) / np.mean(frequencies)) print(cv) print(np.mean(frequencies)) file_name = helpers.path_to_name(file) frequency_info = [frequencies, cv, threshold] helpers.save_results(frequency_info, fish, file_name, 'frequency') npy_files = helpers.get_npy_files(fish, helpers.SAVE_PATH, 'frequency') frequencies = [] for i in range(len(npy_files)): [frequency, cv, threshold] = helpers.load_npy(npy_files[i]) frequency = frequency.tolist() frequencies.extend(frequency) size = np.size(frequencies) / len(npy_files) markers = [size * i for i in range(len(npy_files))] plt.plot(frequencies, '.')
frequencies = calculate_frequency(EOD, sampling_frequency, estimated_frequency=f_estimate, crossing_threshold=threshold, temporal_threshold=0.05, method='median', ascending=True) cv = '{:.2e}'.format(np.std(frequencies) / np.mean(frequencies)) print(cv) print(np.mean(frequencies)) file_name = helpers.path_to_name(file) frequency_info = [frequencies, cv, threshold] helpers.save_results(frequency_info, fish, file_name, 'frequency') fft = [xf, power] helpers.save_results(fft, fish, file_name + '_fft', 'fft') for index in lowrez: file = mat_files[index] print(file) raw_data = helpers.load_mat(file) signal_length = len(raw_data) EOD = helpers.cleaning_data(raw_data) sampling_frequency = 500000 threshold = np.max(EOD) / 3 f_estimate = 800 frequencies = calculate_frequency(EOD, sampling_frequency, estimated_frequency=f_estimate,
def read_csv(source_csv, city_name, city_tag): city_options = City.objects.filter(tag=city_tag) print "Number of cities available: %s" % len(city_options) if not len(city_options): raise ValueError, "CITY NOT FOUND! run make_cities.py first" ## city = City() ## city.name = city_name ## city.tag = to_tag(city.name) ## city.save() else: city = city_options[0] print city feed_date = "2013-07-31" feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date) if feeds.exists(): feed = feeds[0] print "Already had feed: %s, %s" % (feed.city, feed.added) else: feed = FeedInfo() feed.city = city feed.added = feed_date feed.version = "0.1" feed.save() print "Created new feed: %s" % feed.city.name people = Person.objects.filter(name="Blank") if people.exists(): person = people[0] print "Already had person: %s" % (person.name) else: person = Person() person.name = "Blank" person.save() print "Created new person: %s" % person.name sources = Source.objects.filter(feed=feed) if sources.exists(): feed_source = sources[0] print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added) else: feed_source = Source() feed_source.feed = feed feed_source.person = person feed_source.save() print "Created new source: %s" % feed_source.feed.city.name cache_file = "%s.json" % city.tag cache_destination = os.path.join(os.path.dirname(source_csv), cache_file) #keep a local copy of data we've processed... #this should help with subsequent calls #to make sure we don't need to duplicate calls to remote geolocation APIs: local_cache = load_json(cache_destination, create=True) if not local_cache.has_key('buildings'): local_cache['buildings'] = {} if not local_cache.has_key('parcels'): local_cache['parcels'] = {} locations = {} for key, value in local_cache['buildings'].items(): locations[key] = Location(value) #geocoder helper: geo = Geo() skips = 0 #with codecs.open(source_csv, 'rb', encoding='utf-8') as csvfile: with open(source_csv) as csvfile: #reader = csv.reader(csvfile, delimiter=' ', quotechar='|') #reader = csv.reader(csvfile) #reader = unicodecsv.UnicodeReader(csvfile, encoding='utf-8') reader = unicode_csv_reader(csvfile) #just print the first row: print '>, <'.join(reader.next()) count = 0 for row in reader: count += 1 print "Looking at row: %s" % count #could exit out early here, if needed if count > 1000: #exit() pass address = row[0] #need to fix the number being at the end of the address parts = address.split(',') anumber = parts[-1] parts = parts[:-1] street = ",".join(parts) address = "%s %s" % (anumber, street) invoice_number = row[1] bldg_id = row[1] print bldg_id #this is where owner is stored invoice_note = row[6] print invoice_note if re.match('Sent to:', invoice_note): print "changing invoice note from: %s" % invoice_note invoice_note = invoice_note[8:] print "to: %s" % invoice_note else: #raise ValueError, "invoice note does not start with Sent to" print "!!!!!invoice note does not start with Sent to!!!!!" print "" print "" no_units = row[12] ## #should always be "RENTAL" (don't need to track this one) ## permit_type = row[1] ## if not permit_type == "RENTAL" and not permit_type == "MECHANICAL": ## raise ValueError, "Unexpected permit type: %s in row: %s" % ( ## permit_type, row) ## bldg_type = row[2] ## #can use this to filter out non-rental or obsolete entries ## #don't need to track otherwise: ## status = row[3] ## parcel_id = row[4] ## #should be fixed per source: ## ss_city = row[6] ## bldg_sf = row[7] ## no_bldgs = row[8] ## applicant_name = row[9] ## no_stories = row[10] ## no_units = row[11] ## if not ( (ss_city.lower() == city_name.lower()) or (ss_city == '') ): ## raise ValueError, "Unexpected city: %s" % (ss_city) ## sqft = row[7] ## number_of_buildings = row[8] ## applicant_name = row[9] ## number_of_stories = row[10] ## number_of_units = row[11] #check if this is one we want to skip if conversions.has_key(address.upper()): address = conversions[address.upper()] ## if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']): #make sure it's not one we're skipping: if not address: print "SKIPPING ITEM: %s" % row[1] skips += 1 else: #check if we've started processing any results for this row if locations.has_key(address.upper()): location = locations[address.upper()] else: location = Location() #temporarily just want to look at google again #location.sources = ["google"] #location.sources = ["google", "bing", "usgeo", "geonames", "openmq"] location.sources = ["google", "bing"] #do some geocoding, as needed: search = "%s, %s, %s" % (address.upper(), city_name, city.state) any_updated = False for geo_source in location.sources: update = geo.lookup(search, geo_source, location, force=True) #update = geo.lookup(search, geo_source, location, force=False) if update: any_updated = True location.sources = [ "google", "bing", "usgeo", "geonames", "openmq", "mq" ] #this is the case for brand new searches #(which are updated in a different sense) if not hasattr(location, "address_alt") or not location.address_alt: any_updated = True location.address_alt = search #location.bldg_units = bldg_units #location.units_bdrms = units_bdrms locations[address.upper()] = location #handle the database storage bldg = make_building(location, bldg_id, city, feed_source, no_units=no_units) if invoice_note: (person, bldg_person) = make_person(invoice_note, bldg, "Permit Applicant") if any_updated: #back it up for later #enable this when downloading GPS coordinates... #the rest of the time it slows things down local_cache['buildings'] = {} for key, value in locations.items(): local_cache['buildings'][key] = value.to_dict() save_json(cache_destination, local_cache) print destination = '%s.tsv' % city_tag save_results(locations, destination)
def read_csv(source_csv, city_name, city_tag): city_options = City.objects.filter(tag=city_tag) print "Number of cities available: %s" % len(city_options) if not len(city_options): raise ValueError, "CITY NOT FOUND! run make_cities.py first" ## city = City() ## city.name = city_name ## city.tag = to_tag(city.name) ## city.save() else: city = city_options[0] print city feed_date = "2013-10-16" feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date) if feeds.exists(): feed = feeds[0] print "Already had feed: %s, %s" % (feed.city, feed.added) else: feed = FeedInfo() feed.city = city feed.added = feed_date feed.version = "0.1" feed.save() print "Created new feed: %s" % feed.city.name people = Person.objects.filter(name="Blank") if people.exists(): person = people[0] print "Already had person: %s" % (person.name) else: person = Person() person.name = "Blank" person.save() print "Created new person: %s" % person.name sources = Source.objects.filter(feed=feed) if sources.exists(): feed_source = sources[0] print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added) else: feed_source = Source() feed_source.feed = feed feed_source.person = person feed_source.save() print "Created new source: %s" % feed_source.feed.city.name cache_file = "%s.json" % city.tag cache_destination = os.path.join(os.path.dirname(source_csv), cache_file) #keep a local copy of data we've processed... #this should help with subsequent calls #to make sure we don't need to duplicate calls to remote geolocation APIs: local_cache = load_json(cache_destination, create=True) if not local_cache.has_key('buildings'): local_cache['buildings'] = {} if not local_cache.has_key('parcels'): local_cache['parcels'] = {} locations = {} for key, value in local_cache['buildings'].items(): locations[key] = Location(value) #geocoder helper: geo = Geo() skips = 0 #with codecs.open(source_csv, 'rb', encoding='utf-8') as csvfile: with open(source_csv) as csvfile: #reader = csv.reader(csvfile, delimiter=' ', quotechar='|') #reader = csv.reader(csvfile) #reader = unicodecsv.UnicodeReader(csvfile, encoding='utf-8') reader = unicode_csv_reader(csvfile) #just print the first row: print '>, <'.join(reader.next()) count = 0 #want to randomize the order... distribute options more evenly #print len(reader) #exit() #in order to randomize, should randomize the order in the csv for row in reader: count += 1 print "Looking at row: %s" % count #could exit out early here, if needed if count > 10: #exit() pass print row address = row[0] ## no_units = row[12] #can pass this in as bldg_id to make_building #that gets used for parcel too parcel_id = row[1] bldg_id = parcel_id street_num = row[2] street_dir = row[3] street_name = row[4] street_sfx = row[5] #eg building number qualifier_pre = row[6] #eg "UNIT" or "APT" qualifier_post = row[7] apt_num = row[8] #skip row9 (in/out... whatever that means) zip_code = row[10] #skip row11, assessor id #skip row12, address num #skip row13, x #skip row14, y #xcoord == lng lng = row[15] lat = row[16] #entry floor number: (named 'z' in sheet) floor = row[17] #skip row18, strcid... not sure #skip row19, parent #skip row20, app_ #skip row21, hteloc zone = row[22] bldg_type = row[23] #number of buildings bldg_num = row[24] no_units = row[25] #skip row[26], inspection type #skip row27, app number #skip row28, date received #skip row29, application type #skip row30, ownerid #skip row31, operator id #skip row32, agent_id #skip row33, mail to central_heat = row[34] if central_heat == 'Y': central_heat = True else: central_heat = False #heat mechanism? heat mechanic??? not sure heat_mech = row[35] #skip row36, agent id (2) #skip row37, agent last name #skip row38 agent first name #skip row39 agent middle initial #skip row40, agent title #skip row41, business name #could be owner, could be agent owner_name = row[42] owner_address1 = row[43] owner_address2 = row[44] owner_city = row[45] owner_state = row[46] owner_zip = row[47] #address = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre, qualifier_post, apt_num]) address_main = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre]) address_main = address_main.strip() #get rid of any double spaces address_main = address_main.replace(" ", " ") apt_main = " ".join([qualifier_post, apt_num]) apt_main = apt_main.strip() address = address_main print address owner_address = ", ".join([owner_address1, owner_address2, owner_city, owner_state, owner_zip]) ## #should always be "RENTAL" (don't need to track this one) ## permit_type = row[1] ## if not permit_type == "RENTAL" and not permit_type == "MECHANICAL": ## raise ValueError, "Unexpected permit type: %s in row: %s" % ( ## permit_type, row) ## bldg_type = row[2] ## #can use this to filter out non-rental or obsolete entries ## #don't need to track otherwise: ## status = row[3] ## parcel_id = row[4] ## #should be fixed per source: ## ss_city = row[6] ## bldg_sf = row[7] ## no_bldgs = row[8] ## applicant_name = row[9] ## no_stories = row[10] ## no_units = row[11] ## sqft = row[7] ## number_of_buildings = row[8] ## applicant_name = row[9] ## number_of_stories = row[10] ## number_of_units = row[11] #check if this is one we want to skip if conversions.has_key(address.upper()): address = conversions[address.upper()] ## if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']): #make sure it's not one we're skipping: if not address: print "SKIPPING ITEM: %s" % row[1] skips += 1 else: #check if we've started processing any results for this row if locations.has_key(address.upper()): location = locations[address.upper()] else: location = Location() #temporarily just want to look at google again #location.sources = ["google"] #location.sources = ["google", "bing"] #location.sources = ["google", "bing", "usgeo", "geonames", "openmq"] #skip geocoding for columbia location.sources = [] #do some geocoding, as needed: search = "%s, %s, %s" % (address.upper(), city_name, city.state) any_updated = False for geo_source in location.sources: update = geo.lookup(search, geo_source, location, force=True) #update = geo.lookup(search, geo_source, location, force=False) if update: any_updated = True location.sources = ['csv', "google", "bing", "usgeo", "geonames", "openmq", "mq"] #manually add data from csv here: result = [] result.append({'place': address, 'lat': lat, 'lng': lng}) setattr(location, 'csv', result) #this is the case for brand new searches #(which are updated in a different sense) if not hasattr(location, "address_alt") or not location.address_alt: any_updated = True location.address_alt = search #location.bldg_units = bldg_units #location.units_bdrms = units_bdrms locations[address.upper()] = location #handle the database storage bldg = make_building(location, bldg_id, city, feed_source, no_units=no_units, bldg_type=bldg_type) if apt_main: unit = make_unit(apt_main, bldg) (person, bldg_person) = make_person(owner_name, bldg, "Agent", address=owner_address) if any_updated: #back it up for later #enable this when downloading GPS coordinates... #the rest of the time it slows things down local_cache['buildings'] = {} for key, value in locations.items(): local_cache['buildings'][key] = value.to_dict() save_json(cache_destination, local_cache) print #exit() destination = '%s.tsv' % city_tag save_results(locations, destination)
def read_csv(source_csv, city_name, city_tag): city_options = City.objects.filter(tag=city_tag) print "Number of cities available: %s" % len(city_options) if not len(city_options): raise ValueError, "CITY NOT FOUND! run make_cities.py first" ## city = City() ## city.name = city_name ## city.tag = to_tag(city.name) ## city.save() else: city = city_options[0] print city feed_date = "2013-10-16" feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date) if feeds.exists(): feed = feeds[0] print "Already had feed: %s, %s" % (feed.city, feed.added) else: feed = FeedInfo() feed.city = city feed.added = feed_date feed.version = "0.1" feed.save() print "Created new feed: %s" % feed.city.name people = Person.objects.filter(name="Blank") if people.exists(): person = people[0] print "Already had person: %s" % (person.name) else: person = Person() person.name = "Blank" person.save() print "Created new person: %s" % person.name sources = Source.objects.filter(feed=feed) if sources.exists(): feed_source = sources[0] print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added) else: feed_source = Source() feed_source.feed = feed feed_source.person = person feed_source.save() print "Created new source: %s" % feed_source.feed.city.name cache_file = "%s.json" % city.tag cache_destination = os.path.join(os.path.dirname(source_csv), cache_file) #keep a local copy of data we've processed... #this should help with subsequent calls #to make sure we don't need to duplicate calls to remote geolocation APIs: local_cache = load_json(cache_destination, create=True) if not local_cache.has_key('buildings'): local_cache['buildings'] = {} if not local_cache.has_key('parcels'): local_cache['parcels'] = {} locations = {} for key, value in local_cache['buildings'].items(): locations[key] = Location(value) #geocoder helper: geo = Geo() skips = 0 #with codecs.open(source_csv, 'rb', encoding='utf-8') as csvfile: with open(source_csv) as csvfile: #reader = csv.reader(csvfile, delimiter=' ', quotechar='|') #reader = csv.reader(csvfile) #reader = unicodecsv.UnicodeReader(csvfile, encoding='utf-8') reader = unicode_csv_reader(csvfile) #just print the first row: print '>, <'.join(reader.next()) count = 0 #want to randomize the order... distribute options more evenly #print len(reader) #exit() #in order to randomize, should randomize the order in the csv for row in reader: count += 1 print "Looking at row: %s" % count #could exit out early here, if needed if count > 10: #exit() pass print row address = row[0] ## no_units = row[12] #can pass this in as bldg_id to make_building #that gets used for parcel too parcel_id = row[1] bldg_id = parcel_id street_num = row[2] street_dir = row[3] street_name = row[4] street_sfx = row[5] #eg building number qualifier_pre = row[6] #eg "UNIT" or "APT" qualifier_post = row[7] apt_num = row[8] #skip row9 (in/out... whatever that means) zip_code = row[10] #skip row11, assessor id #skip row12, address num #skip row13, x #skip row14, y #xcoord == lng lng = row[15] lat = row[16] #entry floor number: (named 'z' in sheet) floor = row[17] #skip row18, strcid... not sure #skip row19, parent #skip row20, app_ #skip row21, hteloc zone = row[22] bldg_type = row[23] #number of buildings bldg_num = row[24] no_units = row[25] #skip row[26], inspection type #skip row27, app number #skip row28, date received #skip row29, application type #skip row30, ownerid #skip row31, operator id #skip row32, agent_id #skip row33, mail to central_heat = row[34] if central_heat == 'Y': central_heat = True else: central_heat = False #heat mechanism? heat mechanic??? not sure heat_mech = row[35] #skip row36, agent id (2) #skip row37, agent last name #skip row38 agent first name #skip row39 agent middle initial #skip row40, agent title #skip row41, business name #could be owner, could be agent owner_name = row[42] owner_address1 = row[43] owner_address2 = row[44] owner_city = row[45] owner_state = row[46] owner_zip = row[47] #address = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre, qualifier_post, apt_num]) address_main = " ".join([ street_num, street_dir, street_name, street_sfx, qualifier_pre ]) address_main = address_main.strip() #get rid of any double spaces address_main = address_main.replace(" ", " ") apt_main = " ".join([qualifier_post, apt_num]) apt_main = apt_main.strip() address = address_main print address owner_address = ", ".join([ owner_address1, owner_address2, owner_city, owner_state, owner_zip ]) ## #should always be "RENTAL" (don't need to track this one) ## permit_type = row[1] ## if not permit_type == "RENTAL" and not permit_type == "MECHANICAL": ## raise ValueError, "Unexpected permit type: %s in row: %s" % ( ## permit_type, row) ## bldg_type = row[2] ## #can use this to filter out non-rental or obsolete entries ## #don't need to track otherwise: ## status = row[3] ## parcel_id = row[4] ## #should be fixed per source: ## ss_city = row[6] ## bldg_sf = row[7] ## no_bldgs = row[8] ## applicant_name = row[9] ## no_stories = row[10] ## no_units = row[11] ## sqft = row[7] ## number_of_buildings = row[8] ## applicant_name = row[9] ## number_of_stories = row[10] ## number_of_units = row[11] #check if this is one we want to skip if conversions.has_key(address.upper()): address = conversions[address.upper()] ## if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']): #make sure it's not one we're skipping: if not address: print "SKIPPING ITEM: %s" % row[1] skips += 1 else: #check if we've started processing any results for this row if locations.has_key(address.upper()): location = locations[address.upper()] else: location = Location() #temporarily just want to look at google again #location.sources = ["google"] #location.sources = ["google", "bing"] #location.sources = ["google", "bing", "usgeo", "geonames", "openmq"] #skip geocoding for columbia location.sources = [] #do some geocoding, as needed: search = "%s, %s, %s" % (address.upper(), city_name, city.state) any_updated = False for geo_source in location.sources: update = geo.lookup(search, geo_source, location, force=True) #update = geo.lookup(search, geo_source, location, force=False) if update: any_updated = True location.sources = [ 'csv', "google", "bing", "usgeo", "geonames", "openmq", "mq" ] #manually add data from csv here: result = [] result.append({'place': address, 'lat': lat, 'lng': lng}) setattr(location, 'csv', result) #this is the case for brand new searches #(which are updated in a different sense) if not hasattr(location, "address_alt") or not location.address_alt: any_updated = True location.address_alt = search #location.bldg_units = bldg_units #location.units_bdrms = units_bdrms locations[address.upper()] = location #handle the database storage bldg = make_building(location, bldg_id, city, feed_source, no_units=no_units, bldg_type=bldg_type) if apt_main: unit = make_unit(apt_main, bldg) (person, bldg_person) = make_person(owner_name, bldg, "Agent", address=owner_address) if any_updated: #back it up for later #enable this when downloading GPS coordinates... #the rest of the time it slows things down local_cache['buildings'] = {} for key, value in locations.items(): local_cache['buildings'][key] = value.to_dict() save_json(cache_destination, local_cache) print #exit() destination = '%s.tsv' % city_tag save_results(locations, destination)
import numpy as np fish_names = helpers.get_all_fish(helpers.RECORDING_PATH8) indexes = [] fish = fish_names[1] for fish in tqdm(fish_names): npy_files = helpers.get_high_frequency_files(fish, helpers.RECORDING_PATH8) file = npy_files[0] for file in tqdm(npy_files): data = helpers.load_npy(file) data = np.array(data, dtype='int') sampling_frequency = helpers.NPY_FREQUENCY EOD = data - np.mean(data) t_max = int(len(data) / helpers.NPY_FREQUENCY) time_array = np.arange(0, t_max - 1 / sampling_frequency, 1 / sampling_frequency) # IMPLEMENT IN HELPERS threshold = max(data) / 2 fest = calculate_frequency(EOD[::20], sampling_frequency / 20, method='spectral') frequencies = calculate_frequency(EOD, sampling_frequency, estimated_frequency=fest[0], crossing_threshold=threshold / 2, method='median', ascending=True) cv = '{:.2e}'.format(np.std(frequencies) / np.mean(frequencies)) file_name = helpers.path_to_name(file) helpers.save_results(frequencies, fish, file_name)
def read_csv(source_csv): city_options = City.objects.filter(tag="bloomington_in") print "Number of cities available: %s" % len(city_options) if not len(city_options): raise ValueError, "CITY NOT FOUND! run make_cities.py first" ## city = City() ## city.name = "Bloomington" ## city.tag = to_tag(city.name) ## city.save() else: city = city_options[0] print city feed_date = "2013-08-29" feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date) if feeds.exists(): feed = feeds[0] print "Already had feed: %s, %s" % (feed.city, feed.added) else: feed = FeedInfo() feed.city = city feed.added = feed_date feed.version = "0.1" feed.save() print "Created new feed: %s" % feed.city people = Person.objects.filter(name="Blank") if people.exists(): person = people[0] print "Already had person: %s" % (person.name) else: person = Person() person.name = "Blank" person.save() print "Created new person: %s" % person.name sources = Source.objects.filter(feed=feed) if sources.exists(): feed_source = sources[0] print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added) else: feed_source = Source() feed_source.feed = feed feed_source.person = person feed_source.save() print "Created new source: %s" % feed_source.feed.city cache_file = "%s.json" % city.tag cache_destination = os.path.join(os.path.dirname(source_csv), cache_file) # keep a local copy of data we've processed... # this should help with subsequent calls # to make sure we don't need to duplicate calls to remote geolocation APIs: local_cache = load_json(cache_destination, create=True) if not local_cache.has_key("buildings"): local_cache["buildings"] = {} if not local_cache.has_key("parcels"): local_cache["parcels"] = {} locations = {} for key, value in local_cache["buildings"].items(): locations[key] = Location(value) # geocoder helper: geo = Geo() skips = 0 with codecs.open(source_csv, "rb", encoding="utf-8") as csvfile: # reader = csv.reader(csvfile, delimiter=' ', quotechar='|') reader = csv.reader(csvfile) # just print the first row: print ">, <".join(reader.next()) count = 0 for row in reader: count += 1 print "Looking at row: %s" % count # could exit out early here, if needed if count > 1000: # exit() pass bldg_id = row[0] print bldg_id address = row[1] print address owner = row[2] # skip this: ownder_contact = row[3] agent = row[4] bldg_units = row[9] print bldg_units units_bdrms = row[10] print units_bdrms # check if this is one we want to skip if conversions.has_key(address.upper()): address = conversions[address.upper()] # make sure it's not one we're skipping: if not address: print "SKIPPING ITEM: %s" % row[1] skips += 1 else: if locations.has_key(address.upper()): location = locations[address.upper()] else: location = Location() # temporarily just want to look at google again location.sources = ["google"] # do some geocoding, as needed: search = "%s, Bloomington IN" % address.upper() any_updated = False for geo_source in location.sources: update = geo.lookup(search, geo_source, location, force=True) if update: any_updated = True location.sources = ["google", "bing", "usgeo", "geonames", "openmq", "mq"] if not hasattr(location, "address_alt") or not location.address_alt: any_updated = True location.address_alt = search location.bldg_units = bldg_units location.units_bdrms = units_bdrms locations[address.upper()] = location # handle the database storage bldg = make_building(location, bldg_id, city, feed_source) # owner_details = parse_person(owner) if owner: result = special_cases(owner) if result: (owner_name, owner_address) = result else: (owner_name, owner_address, owner_phone, remainder) = parse_person(owner) ## print "owner name: %s" % owner_name ## print "owner address: %s" % owner_address ## print "" if owner_name: (person, bldg_person) = make_person(owner_name, bldg, "Owner", address=owner_address) if agent and agent != "No Agent": # agent_details = parse_person(agent) (agent_name, agent_address, agent_phone, remainder) = parse_person(agent) ## print "agent name: %s" % agent_name ## print "agent address: %s" % agent_address ## print "" if agent_name: (person, bldg_person) = make_person(agent_name, bldg, "Agent", address=agent_address, city=city) if any_updated: # back it up for later # enable this when downloading GPS coordinates... # the rest of the time it slows things down local_cache["buildings"] = {} for key, value in locations.items(): local_cache["buildings"][key] = value.to_dict() save_json(cache_destination, local_cache) print save_results(locations, "bloomington-filtered.tsv")
import helpers import numpy as np fish_names = helpers.get_all_fish(helpers.RECORDING_PATH16) indexes = [] fish = fish_names[6] sampling_frequency = 500000 step = 12 # 15 seconds for fish in (fish_names): mat_files = helpers.get_mat_files(fish, helpers.RECORDING_PATH16) file = mat_files[19] for file in mat_files: raw_data = helpers.load_mat(file) EOD = helpers.cleaning_data(raw_data) step_size = np.size(EOD) / step i = 0 for i in range(0, int(step)): sub_data = EOD[int(i * step_size):int((i + 1) * step_size)] sub_sampling = 1 sub_EOD = sub_data[::sub_sampling] [xf, power] = helpers.compute_fft(sub_EOD, 1, sampling_frequency) file_name = helpers.path_to_name(file) fft = [xf, power] helpers.save_results(fft, fish, file_name + '_fft_' + '{0:03}'.format(i), 'fft') # asdf = ["a_1", "b_2", "c_10"] # asdf.sort(key=lambda x: int(x.split("_")[-1]))