def wake_up_omnisci(): # get OmniSci credentials dfcreds = pd.DataFrame() dfcreds = get_credentials(omnisci_keyfile) # connect to OmniSci, allowing time for the instance to wake connection = wake_and_connect_to_mapd(dfcreds['write_key_name'], dfcreds['write_key_secret'], mapdhost, mapddbname) return connection
def main(): # connect to MapD dfcreds = get_credentials(omnisci_keyfile) connection = wake_and_connect_to_mapd(dfcreds['write_key_name'], dfcreds['write_key_secret'], mapdhost, mapddbname) # loop through tables if connection == 'RETRY': print('could not wake OmniSci; exiting') else: for table, file, dt, tformat, dropcols, in tables_and_files: load_new_table_mapd(connection, table, file, dt, tformat, dropcols, mapdhost, mapduser) # disconnect MapD disconnect_mapd(connection)
def main(): # connect to MapD dfcreds = get_credentials(omnisci_keyfile) connection = wake_and_connect_to_mapd(dfcreds['write_key_name'], dfcreds['write_key_secret'], mapdhost, mapddbname) # loop through tables if connection == 'RETRY': print('could not wake OmniSci; exiting') else: for table, file in tables_and_files: df = pd.DataFrame() df = get_table_mapd(connection, table) df.sort_values(['repo', 'view_timestamp'], inplace=True) print(df.head(10)) df.to_csv(file, index=False) # disconnect MapD disconnect_mapd(connection)
def main(): # credentials dfcreds = get_credentials( keyfile) # get the authentication information from the keyfile auth_header = dfcreds[ 'access_token'] # get the token from the authentication information # connect to github g = Github( auth_header) # instantiate a github object; authorize with the token # setup dataframes df_s_main = pd.DataFrame() df_v_main = pd.DataFrame() df_r_main = pd.DataFrame() # loop through the list of repos for repo in repositories: r = g.get_repo(gproject + repo) # stars df_stars = get_stars(r) df_s_main = df_s_main.append(df_stars, ignore_index=True) # views df_views = get_views(r) df_v_main = df_v_main.append(df_views, ignore_index=True) # referrers df_referrers = get_referrers(r) df_r_main = df_r_main.append(df_referrers, ignore_index=True) # write stars to file print("writing stars to file") df_s_main.to_csv(file_path + "oss_git_stars.csv", index=False) # write views to file print("writing views to file") df_v_main.to_csv(file_path + "oss_git_views.csv", index=False) # write referrers to file print("writing referrers to file") df_r_main.to_csv(file_path + "oss_git_referrers.csv", index=False)
def main(): # credentials dfcreds = get_credentials( keyfile) # get the authentication information from the keyfile headers = {'content-type': 'application/json', 'HLIAMKey': dfcreds['key']} # Viewable Communities rViewableCommunities = requests.get( repo_path + 'api/v2.0/Communities/GetViewableCommunities', headers=headers) dfViewableCommunities = pd.read_json(rViewableCommunities.content) # Community Members payload = { "CommunityKey": 'd06df790-8ca4-4e54-91a0-244af0228ddc', "StartRecord": 1, "EndRecord": 1500 } rCommunityMembers = requests.post( repo_path + 'api/v2.0/Communities/GetCommunityMembers', headers=headers, json=payload) dfCommunityMembers = pd.read_json(rCommunityMembers.content) # add a timestamp to the data dfCommunityMembers['cmtimestamp'] = dt.datetime.now() dfCommunityMembers.index.names = ['rowUID'] dfCommunityMembers.drop( 'Community', 1, inplace=True) #remove the nested dictionary of community information dfCommunityMembers.to_csv(file_path + 'techsup_hl_communitymembers.csv', index=False, date_format="%Y-%m-%d") # Discussion Posts rDiscussionPosts = requests.get( repo_path + 'api/v2.0/Discussions/GetDiscussionPosts?maxToRetrieve=5000', headers=headers) dfDiscussionPosts = pd.read_json(rDiscussionPosts.content) # add a timestamp to the data dfDiscussionPosts['dptimestamp'] = dt.datetime.now() dfDiscussionPosts.to_csv(file_path + 'techsup_hl_discussionposts.csv', index=False, date_format="%Y-%m-%d")
def main(): # connect to MapD dfcreds = get_credentials(omnisci_keyfile) connection = wake_and_connect_to_mapd(dfcreds['write_key_name'], dfcreds['write_key_secret'], mapdhost, mapddbname) # loop through tables if connection != 'RETRY': for table, file, timestamp_cols, timestamp_format, timestamp_units, integer_cols, update_method in tables_and_files: if update_method == 'load': load_new_table_mapd(connection, table, file, timestamp_cols, timestamp_format, timestamp_units, integer_cols) elif update_method == 'append': append_new_table_mapd(connection, table, file, timestamp_cols, timestamp_format, timestamp_units, integer_cols) else: print('invalid or unspecified update method') # disconnect MapD disconnect_mapd(connection) else: print('could not wake OmniSci; exiting')
def main(): # connect to MapD dfcreds = get_credentials(omnisci_keyfile) connection = wake_and_connect_to_mapd(dfcreds['write_key_name'], dfcreds['write_key_secret'], mapdhost, mapddbname) # loop through tables if connection == 'RETRY': print('could not wake OmniSci; exiting') else: for csv_file, renamed_cols, int8_cols, int32_cols, ts_cols, tf, str_cols, bool_cols in file_names: #get the contents of the file and turn them into a dataframe print("reading from file " + csv_file) dfnew = pd.read_csv(csv_file, index_col=False) #rename and recast columns parse_cols(dfnew, renamed_cols, int8_cols, int32_cols, ts_cols, tf, str_cols, bool_cols) #append the contents of this file to the existing table print("appending file contents to table " + table_name) connection.load_table( table_name, dfnew, preserve_index=False, create=False) #load the new table into OmniSci # disconnect MapD disconnect_mapd(connection)