def makeDataDict(filenames): """ Returns a dictionary having data combined of all the CSV files ARGS: filename{list} -- a list containg names of all the files RETURNS: -- a dictionary having the data """ dictionary = defaultdict(list) for i, filename in enumerate(filenames): print('{}. working with {}, please wait...'.format( i, filename.decode('utf-8'))) # calling csv2dict(), to convert the csv file to dictionary data = helper_functions.csv2dict('Data/csvfiles/' + str(filename.decode('utf-8'))) # accessing the key aftershocksyn to check for unique values, similar like (set(list[1, 1, 0, 2, 3])) -> outputs [1, 1, 0, 2, 3] grid_aftershock_count = np.double(data['aftershocksyn']) # no use of if #if len(np.unique(grid_aftershock_count)) < 2: # continue temp = grid_aftershock_count.tolist() # adding a (key, value) to the testingSet dictionary['aftershocksyn'].extend(temp) # now adding remaining columns for column in columns: dictionary[column].extend(np.double(data[column])) return dictionary
def singleshot(): fn = str(random.randint(1, 25)) filename = 'rt-data-io/' + fn + '.csv' weightFile = 'Data/weights.h5' predFile = 'Data/singleCSV/singlePred.h5' columns = [ 'stresses_full_xx', 'stresses_full_yy', 'stresses_full_xy', 'stresses_full_xz', 'stresses_full_yz', 'stresses_full_zz' ] testFile = 'single.h5' dictionary = defaultdict(list) print('working with {},...'.format(filename.split('/')[-1])) data = helper_functions.csv2dict(filename) grid_aftershock_count = np.double(data['aftershocksyn']) temp = grid_aftershock_count.tolist() dictionary['aftershocksyn'].extend(temp) for column in columns: dictionary[column].extend(np.double(data[column])) columns.append('aftershocksyn') helper_functions.dict2HDF('single.h5', columns, dictionary) features_in = [ 'stresses_full_xx', 'stresses_full_yy', 'stresses_full_xy', 'stresses_full_xz', 'stresses_full_yz', 'stresses_full_zz' ] features_out = 'aftershocksyn' model = helper_functions.createModel() model.load_weights(weightFile) X, y = helper_functions.loadDataFromHDF(testFile, features_in, features_out) y_pred = model.predict(X) helper_functions.writeHDF(predFile, X, y) auc = sklearn.metrics.roc_auc_score(y, y_pred) return auc
import csv ##input a file name of csv format from the singleCsv folder in the Data directory. filename = 'rt-data-io/incoming.csv' weightFile = 'Data/weights.h5' predFile = 'Data/singleCSV/singlePred.h5' columns = ['stresses_full_xx', 'stresses_full_yy', 'stresses_full_xy', 'stresses_full_xz', 'stresses_full_yz','stresses_full_zz'] testFile = 'single.h5' dictionary = defaultdict(list) print('working with {},...'.format(filename.split('/')[-1])) data = helper_functions.csv2dict(filename) grid_aftershock_count = np.double(data['aftershocksyn']) temp = grid_aftershock_count.tolist() dictionary['aftershocksyn'].extend(temp) for column in columns: dictionary[column].extend(np.double(data[column])) columns.append('aftershocksyn') helper_functions.dict2HDF('single.h5', columns, dictionary) features_in = ['stresses_full_xx', 'stresses_full_yy', 'stresses_full_xy', 'stresses_full_xz', 'stresses_full_yz', 'stresses_full_zz']
if values2['type'] == "crystal": space = space + 0.25 if test_atom not in values["connected"]: dist = hf.distance_checker(values2["coor"], xyz_list[-1]) if dist < space + 0.25: break atom_dict = {**atom_dict, **temp_atom_dict} break return atom_dict if __name__ == "__main__": # Having a global dict with bonding lengths improves speed a lot global bond_len_dict bond_len_dict = hf.csv2dict("bonding_distances.csv") build = hf.y2true( input("Create new crystal (y) or use existing file (n)?: ")) if build: a = float(input("Specify lattice constant (in Ångström): ")) atom_a = input("Element for first element type: ") atom_b = input("Element for second element type: ") diameter = float(input("Diameter of quantum dot (in unit cells): ")) atom_dict = crystal_builder(a, atom_a, atom_b, diameter) else: crystal_file = input( "Crystal file to use (don't write the file extension): ") + ".xyz" atom_dict = crystal_reader(crystal_file) foldername = input("Save in folder (or main): ") if foldername == "main":