def main(date_time): try: count = 0 for obj in KNACK_PARAMS['REFERENCE_OBJECTS']: # get field metadata fields = knack_helpers.get_all_fields(obj, KNACK_PARAMS) # assign field metadata to 'raw' field name field_list = {} for field in fields: field_list[field['key'] + '_raw'] = field # update knack params with list of all field names KNACK_PARAMS['FIELD_NAMES'] = knack_helpers.create_label_list( fields) # get knack object data data = knack_helpers.get_object_data(obj, KNACK_PARAMS) # parse data parsed = knack_helpers.parse_data(data, field_list, KNACK_PARAMS, convert_to_unix=True, include_ids=True) today = date_time.format('YYYY_MM_DD') file_name = '{}/{}_{}.csv'.format(BACKUP_DIRECTORY, obj, today) try: data_helpers.write_csv(parsed, file_name=file_name) except Exception as e: print(e) body = 'Data backup of failed when writing csv' email_helpers.send_email(secrets['ALERTS_DISTRIBUTION'], 'data backup exception', body) raise e count += 1 return count except Exception as e: print('Failed to process data for {}'.format(date_time)) print(e) body = 'Data backup of failed' email_helpers.send_email(secrets['ALERTS_DISTRIBUTION'], 'data backup exception', body) raise e
def main(date_time): print('starting stuff now') try: field_list = knack_helpers.get_fields(KNACK_PARAMS) knack_data = knack_helpers.get_data(KNACK_PARAMS) knack_data = knack_helpers.parse_data(knack_data, field_list, KNACK_PARAMS, require_locations=True, convert_to_unix=True) knack_data = data_helpers.stringify_key_values(knack_data) knack_data = data_helpers.remove_linebreaks(knack_data, ['LOCATION_NAME']) knack_data_mills = data_helpers.unix_to_mills(knack_data) token = agol_helpers.get_token(secrets.AGOL_CREDENTIALS) agol_payload = agol_helpers.build_payload(knack_data_mills) del_response = agol_helpers.delete_features(SERVICE_URL, token) add_response = agol_helpers.add_features(SERVICE_URL, token, agol_payload) # write to csv knack_data = data_helpers.mills_to_unix(knack_data) knack_data = data_helpers.unix_to_iso(knack_data) file_name = '{}/{}.csv'.format(CSV_DESTINATION, DATASET_NAME) data_helpers.write_csv(knack_data, file_name=file_name) return add_response except Exception as e: print('Failed to process data for {}'.format(date_time)) print(e) raise e
def main(date_time): print('starting stuff now') try: url = 'https://raw.githubusercontent.com/cityofaustin/transportation/gh-pages/components/data/quote_of_the_week.csv' # get new quote data from Knack database # this is where the user maintains the quotes for obj in KNACK_PARAMS['REFERENCE_OBJECTS']: # get field metadata fields = knack_helpers.get_all_fields(obj, KNACK_PARAMS) # assign field metadata to 'raw' field name field_list = {} for field in fields: field_list[field['key'] + '_raw'] = field # update knack params with list of all field names KNACK_PARAMS['FIELD_NAMES'] = knack_helpers.create_label_list( fields) # get knack object data data = knack_helpers.get_object_data(obj, KNACK_PARAMS) # parse data data = knack_helpers.parse_data(data, field_list, KNACK_PARAMS, convert_to_unix=True) # prepare dates for the internet data = data_helpers.unix_to_mills(data) payload = data_helpers.write_csv(data, in_memory=True) git_auth = github_helpers.create_auth_tuple( secrets.GITHUB_CREDENTIALS) repo_data = github_helpers.get_file(GIT_PARAMS['REPO_URL'], GIT_PARAMS['PATH'], 'gh-pages', git_auth) GIT_PARAMS['sha'] = repo_data['sha'] git_response = github_helpers.commit_file(GIT_PARAMS['REPO_URL'], GIT_PARAMS['PATH'], GIT_PARAMS['BRANCH'], payload, 'update_quote_of_week', GIT_PARAMS['sha'], git_auth, existing_file=repo_data) return git_response except Exception as e: print('Failed to process data for {}'.format(date_time)) print(e) raise e
def main(date_time): print('starting stuff now') try: field_list = knack_helpers.get_fields(KNACK_PARAMS) knack_data = knack_helpers.get_data(KNACK_PARAMS) knack_data = knack_helpers.parse_data(knack_data, field_list, KNACK_PARAMS, require_locations=True, convert_to_unix=True) knack_data = data_helpers.stringify_key_values(knack_data) knack_data_mills = data_helpers.unix_to_mills(deepcopy(knack_data)) # token = agol_helpers.get_token(secrets.AGOL_CREDENTIALS) # agol_payload = agol_helpers.build_payload(knack_data_mills) # del_response = agol_helpers.delete_features(SERVICE_URL, token) # add_response = agol_helpers.add_features(SERVICE_URL, token, agol_payload) socrata_data = socrata_helpers.get_private_data( secrets.SOCRATA_CREDENTIALS, SOCRATA_RESOURCE_ID) socrata_data = data_helpers.upper_case_keys(socrata_data) socrata_data = data_helpers.stringify_key_values(socrata_data) socrata_data = data_helpers.iso_to_unix(socrata_data, replace_tz=True) cd_results = data_helpers.detect_changes( socrata_data, knack_data, PRIMARY_KEY, keys=KNACK_PARAMS['FIELD_NAMES'] + ['LATITUDE', 'LONGITUDE']) if cd_results['new'] or cd_results['change'] or cd_results['delete']: socrata_payload = socrata_helpers.create_payload( cd_results, PRIMARY_KEY) socrata_payload = socrata_helpers.create_location_fields( socrata_payload) else: socrata_payload = [] socrata_payload = data_helpers.lower_case_keys(socrata_payload) socrata_payload = data_helpers.unix_to_iso(socrata_payload) upsert_response = socrata_helpers.upsert_data( secrets.SOCRATA_CREDENTIALS, socrata_payload, SOCRATA_RESOURCE_ID) if 'error' in upsert_response: email_helpers.send_socrata_alert(secrets.ALERTS_DISTRIBUTION, SOCRATA_RESOURCE_ID, upsert_response) elif upsert_response['Errors']: email_helpers.send_socrata_alert(secrets.ALERTS_DISTRIBUTION, SOCRATA_RESOURCE_ID, upsert_response) log_payload = socrata_helpers.prep_pub_log(date_time, 'travel_sensor_update', upsert_response) pub_log_response = socrata_helpers.upsert_data( secrets.SOCRATA_CREDENTIALS, log_payload, SOCRATA_PUB_LOG_ID) # write to csv knack_data = data_helpers.unix_to_iso(knack_data) file_name = '{}/{}.csv'.format(CSV_DESTINATION, DATASET_NAME) data_helpers.write_csv(knack_data, file_name=file_name) return log_payload except Exception as e: print('Failed to process data for {}'.format(date_time)) print(e) raise e
cnt = 0 for word in word2count: try: wordVec = word2vec[word] if not np.isnan(wordVec).any(): X.append(wordVec) words.append(word) except Exception: continue if clusteringType == "KMeans": classifier = KMeans(n_clusters=10) elif clusteringType == "DBSCAN": classifier = DBSCAN(min_samples=100) else: raise Exception("Invalid clusteringType") predictions = classifier.fit_predict(X) word2cluster = dict() rows = [["Word", "Cluster"]] for i, word in enumerate(words): clusterNum = predictions[i] word2cluster[word] = clusterNum rows.append([word, clusterNum]) filename = "data/" + wordvecType + "-" + clusteringType + "-" + category + "_word2cluster.csv" data_helpers.write_csv(filename, rows) data_helpers.write_csv_to_xlsx(filename)