def preprocessing(self): # step 1: preprocessing if np.DataSource().exists("feat.npy") and np.DataSource().exists("label.npy"): features, labels = np.load('feat.npy'), np.load('label.npy') else: features, labels = feature_extraction(self.train_dir) np.save('feat.npy', features) np.save('label.npy', labels) print (features, labels) return features, labels
def from_homebuilt_APDscan_ASCII_triplet(partialfilepath, name=None, parent=None): ends = ["_A_set.txt", "_B_Mtr.txt", "_C_Mrt.txt"] for end in ends: partialfilepath = _strip_end(partialfilepath, end) filestrs = [os.fspath(partialfilepath + end) for end in ends] fs = [np.DataSource(None).open(filestr, "rb") for filestr in filestrs] # first harvest some metadata from A_set file arr = np.loadtxt(fs[0], max_rows=1, dtype=int) x0, y0, extent, pixels = arr[0], arr[1], arr[2], arr[3] x = np.linspace(x0, extent + x0, pixels) y = np.linspace(y0, extent + y0, pixels) # grab trace and retrace data trace = np.genfromtxt(fs[1], unpack=True) retrace = np.genfromtxt(fs[2], unpack=True) # parse name if name is None: name = pathlib.Path(partialfilepath).stem # create data kwargs = {"name": name, "kind": "APDscan", "source": filestrs[1]} if parent is None: data = wt.Data(**kwargs) else: data = parent.create_data(**kwargs) data.create_variable("x", values=x[:, None], units="um") data.create_variable("y", values=y[None, :], units="um") data.create_channel("trace", values=trace) data.create_channel("retrace", values=retrace) data.transform("x", "y") for f in fs: f.close() return data
def get_sample_data(n_sess, full_brain=False, subj=1): """ Download the data for the current session and subject Parameters ---------- n_sess: int number of session, one of {0, 1, 2, 3, 4} subj: int number of subject, one of {1, 2} """ DIR = tempfile.mkdtemp() ds = np.DataSource(DIR) BASEDIR = 'http://fa.bianp.net/projects/hrf_estimation/data' BASEDIR_COMMON = BASEDIR + '/data_common/' if full_brain: BASEDIR += '/full_brain' BASEDIR_SUBJ = BASEDIR + '/data_subj%s/' % subj event_matrix = io.mmread(ds.open(BASEDIR_COMMON + 'event_matrix.mtx')).toarray() print('Downloading BOLD signal') voxels = np.load(ds.open(BASEDIR_SUBJ + 'voxels_%s.npy' % n_sess)) # print('Downloading Scatting Stim') # scatt_stim = np.load(ds.open( # BASEDIR_SUBJ + 'scatt_stim_%s.npy' % n_sess)) em = sparse.coo_matrix(event_matrix) fir_matrix = utils.convolve_events(event_matrix, np.eye(HRF_LENGTH)) events_train = sparse.block_diag([event_matrix] * 5).toarray() conditions_train = sparse.coo_matrix(events_train).col onsets_train = sparse.coo_matrix(events_train).row return voxels, conditions_train, onsets_train
def numpy(self): """ :return: array (numpy void) Returns the data from the query in the form of a numpy void array. """ return numpy.load( numpy.DataSource(None).open(self.makeurl('npy'), 'rb'))
def GD(max_iter, tol, eta): iterations = 0 F = numpy.array([]) # Stores the values of the function Epoch = numpy.array([]) while iterations < max_iter: if iterations == 0: # Generate Initial Guess and Book Keeping if numpy.DataSource().exists('InitialGuess.txt'): W = numpy.loadtxt('InitialGuess.txt') # Load the Initial Weights else: W = Initalisation() numpy.savetxt('InitialGuess.txt', W) # Generate the Weights and save them W = numpy.reshape(W, (1, 2)) f_temp = Eval_Func(W[-1, 0], W[-1, 1]) F = numpy.concatenate((F, [f_temp]), axis=0) Epoch = numpy.concatenate((Epoch, [iterations]), axis=0) print('No. of Iterations: ', iterations, ' Points: ', W[-1], ' Function Value: ', F[-1], '\n') iterations += 1 else: # Run The Gradient Descent Algorithm w_temp = Update_Weights(W[-1, 0], W[-1, 1], eta) f_temp = Eval_Func(W[-1, 0], W[-1, 1]) # Book Keeping W = numpy.concatenate((W, [w_temp]), axis=0) F = numpy.concatenate((F, [f_temp]), axis=0) Epoch = numpy.concatenate((Epoch, [iterations]), axis=0) print('No. of Iterations: ', iterations, ' Points: ', W[-1], ' Function Value: ', F[-1], '\n') # Check for Close Weights if (W[-1] - W[-2]).all() < tol: print('Optimal Value Reached') break else: iterations += 1 return Epoch, W, F
def read_atsp(url): ''' Motivation: for some reason, the .atsp-file has 212 rows (not 53!). So, it needs to be manually processed. The function takes url (string) and returns a numpy array with edge costs. ''' # Reads external file with numpy, # spicifies the number of lines to skip, # initializes a lst to write to: ds = np.DataSource() input_f = ds.open(url) num_lines_skip = 6 lst = [] # Appends each line to a list except for unuseful info: for i, line in enumerate(input_f): if (i <= num_lines_skip) or ('EOF' in line): continue else: lst.append(line) # Concatenate each read line into one string, # removes all carriage returns and trailling whitespaces: lst = ''.join(lst).replace('\n', '').split() # Return numpy array, reshape it into 53 x 53 matrix: return np.array(lst, dtype=int).reshape(53, 53)
def read_bin(fname, fdir, fnum, minfo, numtype=np.longdouble, getfilename=True): '''Read in a floating point array''' filename = fdir + fname + '_' + fnum + '.mesh' datas = np.DataSource() read_ok = datas.exists(filename) my_dtype = set_dtype(minfo.contents['endian'], minfo.contents['AcRealSize'], print_type=getfilename) if read_ok: if getfilename: print(filename) array = np.fromfile(filename, dtype=my_dtype) timestamp = array[0] array = np.reshape(array[1:], (minfo.contents['AC_mx'], minfo.contents['AC_my'], minfo.contents['AC_mz']), order='F') else: array = None timestamp = None return array, timestamp, read_ok
def __init__(self): self.d = int(ceil(e/EPSILON)) self.w = int(ceil(log1p(1/HIPOTHESIS_P))) date = lastWeekday(dt.datetime.now()) if (not np.DataSource().exists(ATM_MODEL.format(str(date.day)))): if dt.datetime.now().hour >= RELOAD_TIME: self.fname = ATM_MODEL.format(str(date.day)) else: self.fname = ATM_MODEL.format(str(lastWeekday(date - dt.timedelta(days=1)))) np.save(self.fname,np.array([[0 for i in range(self.w)] for j in range(self.d)]))
def from_BrunoldrRaman(filepath, name=None, parent=None, verbose=True) -> Data: """Create a data object from the Brunold rRaman instrument. Expects one energy (in wavenumbers) and one counts value. Parameters ---------- filepath : path-like Path to .txt file. Can be either a local or remote file (http/ftp). Can be compressed with gz/bz2, decompression based on file name. name : string (optional) Name to give to the created data object. If None, filename is used. Default is None. parent : WrightTools.Collection (optional) Collection to place new data object within. Default is None. verbose : boolean (optional) Toggle talkback. Default is True. Returns ------- data New data object(s). """ # parse filepath filestr = os.fspath(filepath) filepath = pathlib.Path(filepath) if not ".txt" in filepath.suffixes: wt_exceptions.WrongFileTypeWarning.warn(filepath, ".txt") # parse name if not name: name = filepath.name.split(".")[0] # create data kwargs = {"name": name, "kind": "BrunoldrRaman", "source": filestr} if parent is None: data = Data(**kwargs) else: data = parent.create_data(**kwargs) # array ds = np.DataSource(None) f = ds.open(filestr, "rt") arr = np.genfromtxt(f, delimiter="\t").T f.close() # chew through all scans data.create_variable(name="energy", values=arr[0], units="wn") data.create_channel(name="signal", values=arr[1]) data.transform("energy") # finish if verbose: print("data created at {0}".format(data.fullpath)) print(" range: {0} to {1} (wn)".format(data.energy[0], data.energy[-1])) print(" size: {0}".format(data.size)) return data
def learnImage(weight_path, model_path='model', load_weight=False): img_width, img_height = 50, 50 channel = 3 nb_train_samples = 58 nb_validation_samples = 58 epochs = 300 batch_size = 14 train_data_dir = 'data/train' # Database for learning validation_data_dir = 'data/validation' # Database for testing if K.image_data_format() == 'channels_first': input_shape = (channel, img_width, img_height) else: input_shape = (img_width, img_height, channel) # model = Cnn(input_shape) model = Vgg16(input_shape) if load_weight and np.DataSource().exists(weight_path): model.load_weights(weight_path) model.compile(loss='binary_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy']) train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) test_datagen = ImageDataGenerator(rescale=1. / 255) train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='binary') validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='binary') model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size, epochs=epochs, validation_data=validation_generator, validation_steps=nb_validation_samples // batch_size) model.save_weights(weight_path) model.save(model_path) print('save in ' + weight_path)
def from_ocean_optics(filepath, name=None, *, parent=None, verbose=True) -> Data: """Create a data object from an Ocean Optics brand spectrometer. Parameters ---------- filepath : path-like Path to an ocean optics output file. Can be either a local or remote file (http/ftp). Can be compressed with gz/bz2, decompression based on file name. name : string (optional) Name to give to the created data object. If None, filename is used. Default is None. parent : WrightTools.Collection (optional) Collection to place new data object within. Default is None. verbose : boolean (optional) Toggle talkback. Default is True. Returns ------- data New data object. """ # parse filepath filestr = os.fspath(filepath) filepath = pathlib.Path(filepath) if not ".scope" in filepath.suffixes: wt_exceptions.WrongFileTypeWarning.warn(filepath, ".scope") # parse name if not name: name = filepath.name.split(".")[0] # create data kwargs = {"name": name, "kind": "Ocean Optics", "source": filestr} if parent is None: data = Data(**kwargs) else: data = parent.create_data(**kwargs) # array skip_header = 14 skip_footer = 1 ds = np.DataSource(None) f = ds.open(filestr, "rt") arr = np.genfromtxt(f, skip_header=skip_header, skip_footer=skip_footer, delimiter="\t").T f.close() # construct data data.create_variable(name="energy", values=arr[0], units="nm") data.create_channel(name="signal", values=arr[1]) data.transform("energy") # finish if verbose: print("data created at {0}".format(data.fullpath)) print(" range: {0} to {1} (nm)".format(data.energy[0], data.energy[-1])) print(" size: {0}".format(data.size)) return data
def load_dataset(): ds = np.DataSource() if ds.exists('trainX.npy') and ds.exists('trainY.npy') and ds.exists( 'testX.npy'): mylogger.info('exist saved file. load.') trainX = np.load('trainX.npy') trainY = np.load('trainY.npy') testX = np.load('testX.npy') return trainX, trainY, testX # train_ori_X = read_train_text_to_list('../data/trainData.txt') train_ori_Y = read_train_text_to_list('../data/trainLabel.txt') train_ori_Y = np.array([int(y) for y in train_ori_Y]) # test_ori_X = read_train_text_to_list('../data/testData.txt') train_sentences = list(read_raw_documents('../data/trainData.txt')) test_sentences = list( read_raw_documents('../data/testData.txt', tokens_only=True)) # # 数据预处理 & 特征工程 # 1. Count Vectors as feature # 2. TF-IDF Vectors as festures # 3. Word Embeddings as features # 4. Text/NLP based features # 5. Topic Models as features vector_size = 50 model = gensim.models.doc2vec.Doc2Vec(vector_size=vector_size, min_count=2, epochs=40) model.build_vocab(train_sentences) model.train(train_sentences, total_examples=model.corpus_count, epochs=model.epochs) n_train_samples = len(train_sentences) n_test_samples = len(test_sentences) vector_size = 50 train_X = np.zeros((n_train_samples, vector_size)) test_X = np.zeros((n_test_samples, vector_size)) for i in range(0, n_train_samples): train_X[i] = model.infer_vector(train_sentences[i][0]) for i in range(0, n_test_samples): test_X[i] = model.infer_vector(test_sentences[i]) train_Y = train_ori_Y train_X.shape np.save('trainX', train_X) np.save('trainY', train_Y) np.save('testX', test_X) return train_X, train_Y, test_X
def load_prior(cfg, verbose=False): core = cfg.core prior = cfg.prior nexp = core.nexp workdir = core.datadir_output begin_time = time() # Define the number of assimilation times recon_times = np.arange(core.recon_period[0], core.recon_period[1] + 1, core.recon_timescale) ntimes, = recon_times.shape # prior if verbose: print('Source for prior: ', prior.prior_source) # Assign prior object according to "prior_source" (from namelist) X = LMR_prior.prior_assignment(prior.prior_source) X.prior_datadir = prior.datadir_prior X.prior_datafile = prior.datafile_prior X.statevars = prior.state_variables X.statevars_info = prior.state_variables_info X.Nens = core.nens X.anom_reference = prior.anom_reference X.detrend = prior.detrend X.avgInterval = prior.avgInterval # Read data file & populate initial prior ensemble X.populate_ensemble(prior.prior_source, prior) Xb_one_full = X.ens # Prepare to check for files in the prior (work) directory (this object just # points to a directory) prior_check = np.DataSource(workdir) # this is a hack that skips over regridding option X.trunc_state_info = X.full_state_info Xb_one = Xb_one_full Xb_one_coords = X.coords [Nx, _] = Xb_one.shape # Keep dimension of pre-augmented version of state vector [state_dim, _] = Xb_one.shape if verbose: elapsed_time = time() - begin_time print('-----------------------------------------------------') print('completed in ' + str(elapsed_time) + ' seconds') print('-----------------------------------------------------') return X, Xb_one
def get_eops(): """ This function downloads the Earth Orientation Parameters (EOPs) from the IAU sources and returns them as a pandas dataframe; https://datacenter.iers.org/eop.php """ url = 'ftp://hpiers.obspm.fr/iers/eop/eopc04/eopc04_IAU2000.62-now' ds = np.DataSource(path) file = ds.open(url) array = np.genfromtxt(file, skip_header=14) headers = [ 'Year', 'Month', 'Day', 'MJD', 'x', 'y', 'UT1-UTC', 'LOD', 'dX', 'dY', 'x Err', 'y Err', 'UT1-UTC Err', 'LOD Err', 'dX Err', 'dY Err' ] eop = pd.DataFrame(data=array, index=array[:, 3], columns=headers) return eop
def read(self, train, batch_size=1, num_samples=1, num_epochs=1): if train and np.DataSource().exists(self.tfrecord_name + "_train" + ".tfrecord"): tfrecord = self.tfrecord_name + "_train" + ".tfrecord" else: tfrecord = self.tfrecord_name + "_test" + ".tfrecord" dataset = tf.data.TFRecordDataset([self.load_path + tfrecord]) dataset = dataset.map(self.decode) if train: dataset = dataset.shuffle(num_samples) dataset = dataset.repeat() dataset = dataset.batch(batch_size, drop_remainder=True) iterator = dataset.make_one_shot_iterator() return iterator.get_next()
def __init__(self): if not np.DataSource().exists(INBOUND_FILENAME): self._parse_raw_data() else: with open(INBOUND_FILENAME, 'rb') as inbound_file: self.inbound = pickle.load(inbound_file) with open(OUTBOUND_FILENAME, 'rb') as outbound_file: self.outbound = pickle.load(outbound_file) # Building undirect dict self.undirect = dict() for key in self.inbound: self.undirect[key] = self.inbound[key] if key in self.outbound: self.undirect[key] |= self.outbound[key]
def psf_beads_analysis(): temp_dir = path.abspath(TEST_DATA_DIR) file_name = '20191206_100xOil_A647_Cy3_FITC_DAPI_ri-1.512_na-1.4_100nm_561_002_SIR.npy' file_url = 'http://dev.mri.cnrs.fr/attachments/download/2295/psf_beads_EM-488_MAG-40.npy' try: data = np.load(path.join(temp_dir, file_name)) except FileNotFoundError as e: repos = np.DataSource(temp_dir) repos.open(file_url) analysis = psf_beads.PSFBeadsAnalysis() analysis.input.data = {'beads_image': data} analysis.set_metadata('theoretical_fwhm_lateral_res', 0.300) analysis.set_metadata('theoretical_fwhm_axial_res', 0.800) analysis.set_metadata('pixel_size', (.35, .06, .06)) return analysis
def argolight_b(): temp_dir = path.abspath(TEST_DATA_DIR) file_name = '201702_RI510_Argolight-1-1_010_SIR_ALX.npy' file_url = 'http://dev.mri.cnrs.fr/attachments/download/2290/201702_RI510_Argolight-1-1_010_SIR_ALX.npy' try: data = np.load(path.join(temp_dir, file_name)) except FileNotFoundError as e: repos = np.DataSource(temp_dir) repos.open(file_url) raise Exception from e analysis = argolight.ArgolightBAnalysis() analysis.input.data = {'argolight_b': data} analysis.set_metadata('spots_distance', 5) analysis.set_metadata('pixel_size', (.125, .39, .39)) return analysis
def psf_beads_analysis(): temp_dir = path.abspath(TEST_DATA_DIR) file_name = 'psf_beads_EM-488_MAG-40.npy' file_url = '' try: data = np.load(path.join(temp_dir, file_name)) except FileNotFoundError as e: repos = np.DataSource(temp_dir) repos.open(file_url) analysis = psf_beads.PSFBeadsAnalysis() analysis.input.data = {'beads_image': data} analysis.set_metadata('theoretical_fwhm_lateral_res', 0.300) analysis.set_metadata('theoretical_fwhm_axial_res', 0.800) analysis.set_metadata('pixel_size', (.35, .06, .06)) return analysis
def __init__(self): #Load the similarity array array_url = 'https://s3-us-west-1.amazonaws.com/materialsintelligence/matminer_array.npy' ds = np.DataSource() ds.open(array_url) self.matminer_array = np.load(ds.abspath(array_url)) #Other data mat2index_url = 'https://s3-us-west-1.amazonaws.com/materialsintelligence/mat2index.p' index2mat_url = 'https://s3-us-west-1.amazonaws.com/materialsintelligence/index2mat.p' scaler_url = 'https://s3-us-west-1.amazonaws.com/materialsintelligence/scaler.p' self.mat2index = pickle.load(ds.open(mat2index_url, 'rb')) self.index2mat = pickle.load(ds.open(index2mat_url, 'rb')) self.scaler = pickle.load(ds.open(scaler_url, 'rb')) #Mat parser self.parser = SimpleParser()
def get_data(d): """Return train and test set.""" data = np.DataSource() if d == 'train': out = data.open('http://archive.ics.uci.edu/ml/' 'machine-learning-databases/' 'poker/poker-hand-training-true.data') elif d == 'test': out = data.open('http://archive.ics.uci.edu/ml/' 'machine-learning-databases/' 'poker/poker-hand-testing.data') else: raise ValueError("Not valid data option.") X = np.loadtxt(out, delimiter=",") y = X[:, -1] X = X[:, :-1] return X, y
def readFile(fileName): deviceList =[] numOfDevices = 0 # Check if file exists in THIS directory or src/ directory if np.DataSource().exists(fileName): readfile = open(fileName, "r") else: readfile = open('src/'+fileName, "r") for line in readfile: # Remove leading and trailing spaces line = line.strip() # Skip commented out and blank lines (\n) which have a len of 2 if not line.startswith('#') and not line.startswith('//') and len(line) > 2: line = line.strip("\r\n") line = line.split(" ") deviceList.append([line[0], line[1], line[2]]) numOfDevices += 1 return numOfDevices, deviceList
def open(filepath, edit_local=False): """Open any wt5 file, returning the top-level object (data or collection). Parameters ---------- filepath : path-like Path to file. Can be either a local or remote file (http/ftp). Can be compressed with gz/bz2, decompression based on file name. edit_local : boolean (optional) If True, the file itself will be opened for editing. Otherwise, a copy will be created. Default is False. Returns ------- WrightTools Collection or Data Root-level object in file. """ filepath = os.fspath(filepath) ds = np.DataSource(None) if edit_local is False: tf = tempfile.mkstemp(prefix="", suffix=".wt5") with _open(tf[1], "w+b") as tff: with ds.open(str(filepath), "rb") as f: tff.write(f.read()) filepath = tf[1] f = h5py.File(filepath, "r") class_name = f["/"].attrs["class"] name = f["/"].attrs["name"] f.close() if class_name == "Data": obj = wt_data.Data(filepath=str(filepath), name=name, edit_local=True) elif class_name == "Collection": obj = wt_collection.Collection(filepath=str(filepath), name=name, edit_local=True) else: obj = wt_group.Group(filepath=str(filepath), name=name, edit_local=True) if edit_local is False: setattr(obj, "_tmpfile", tf) weakref.finalize(obj, obj.close) return obj
def read_PETSc_vec(file): import numpy dsource = numpy.DataSource() # open file # omit header # read length # read values # close file try: f = open(file, "rb") except: print("Unexpected error:", sys.exc_info()[0], file) numpy.fromfile(f, dtype=">i4", count=1) nvec = numpy.fromfile(f, dtype=">i4", count=1) #load data and change it to little endian, importend for np.dot v = numpy.fromfile(f, dtype=">f8", count=nvec[0]).astype('<f8') f.close() return v
def load_url(url, augment): repo = np.DataSource() file = repo.open(url) mat_data = loadmat(file.name) # Load the mat file data X = np.moveaxis(mat_data['X'], -1, 0) # Get features from dataset y = mat_data['y'].flatten() - 1 # Get class from dataset # If augment option is turned on, # insert augmentation data to original data and class set if augment: dataList = [] classList = [] for img_element, class_element in zip(X, y): aug_images = img_augmenting(img_element) for aug in aug_images: dataList.append(aug) classList.append(class_element) X = np.vstack(dataList) y = np.vstack(classList).flatten() X = np.true_divide(X, 255.0) return X, y
def __init__(self): """ The constructor for the Cluster Plot object :param entity_type: 'all' or 'materials' :param limit: number of most common entities to plot :param heatphrase: color according to similarity to this phrase :param wordphrases: filter to show only the specified phrases """ ds = np.DataSource() # material_names_url = "https://s3-us-west-1.amazonaws.com/materialsintelligence/material_map_tsne_words.npy" material_coords_url = "https://s3-us-west-1.amazonaws.com/materialsintelligence/final_material_map_atl10_30_ee12_lr200.npy" # ds.open(material_names_url) ds.open(material_coords_url) self.ee = EmbeddingEngine() self.embs = self.ee.embeddings / self.ee.norm # materials_json = urlopen("https://s3-us-west-1.amazonaws.com/matstract/material_map_10_mentions.json") # materials_data = materials_json.read().decode("utf-8") # self.materials_tsne_data = json.loads(materials_data)["data"][0] # self.norm_matnames = [self.ee.dp.get_norm_formula(m) for m in self.materials_tsne_data["text"]] # self.matname2index = dict() # for i, label in enumerate(self.norm_matnames): # self.matname2index[label] = i self.materials_tsne_data = np.load(ds.abspath(material_coords_url)) formula_counts = dict() for formula in self.ee.formulas_full: formula_counts[formula] = 0 for elem in self.ee.formulas_full[formula]: formula_counts[formula] += self.ee.formulas_full[formula][elem] mat_counts = sorted(formula_counts.items(), key=lambda x: x[1], reverse=True) mat_counts = [mat_count for mat_count in mat_counts if mat_count[1] >= 10] self.norm_matnames = [m[0] for m in mat_counts] self.matname2index = dict() for i, label in enumerate(self.norm_matnames): self.matname2index[label] = i
def read(filepath, *, encoding="utf-8"): """Read headers from given filepath. Parameters ---------- filepath : path-like or iterable of strings Path to file or iterable of stings encoding : str Encoding to use when opening the file. No effect if iterable of strings given. Returns ------- collections.OrderedDict Dictionary containing header information. """ headers = collections.OrderedDict() ds = np.DataSource(None) # The following code is adapted from np.genfromtxt source try: if isinstance(filepath, os.PathLike): filepath = os.fspath(filepath) if isinstance(filepath, str): fhd = iter(ds.open(filepath, "rt", encoding=encoding)) else: fhd = iter(filepath) except TypeError: raise TypeError( "filepath must be a path-like, list of strings, " "or generator. Got %s instead." % type(filepath) ) for line in fhd: if line[0] == "#": split = re.split(": |:\t", line, maxsplit=1) key = split[0][2:] headers[key] = string2item(split[1]) else: break # all header lines are at the beginning return headers
def process(self): while True: try: if np.DataSource().exists("model.h5"): from keras.models import load_model model = load_model('model.h5') else: model = train(features, labels, epochs=100) model.save('model.h5') # step 3: prediction 更改filepath,go through all folder 得到file path,在init文件里面predict function里面更改,可以自动检索文件然后不断运行,达到实时测试的效果 for root, dirs, files in os.walk(self.test_dir): for file in files: filepath = os.path.join(root, file) print(filepath) pred= predict(model=model,data_path=filepath) print_leaderboard(pred=pred,data_path=self.train_dir) os.remove(filepath) #option1: delete it search for the command in python to delete file #option2: os.path.getmtime()how to check the folder is updated except KeyboardInterrupt as e: #使用cmd+c退出 break
def Output(exportData, model, seed, result, noiseLevel, resultNoisy, filesPath, antStr, bioTap, selections, filename): # export csv of results # do not want to give students the clean data #data = {next_name:result[:,i] for i,next_name in enumerate(selections)} #df = pd.DataFrame.from_dict(data) #df.set_index('time', inplace=True) #df.to_csv(filesPath + "Results_Clean2.csv") data = { next_name: resultNoisy[:, i] for i, next_name in enumerate(selections) } df = pd.DataFrame.from_dict(data) df.set_index('time', inplace=True) df.to_csv(filesPath + filename + ".csv") # export csv file for importing into Biotapestry if bioTap != '': f2 = open(filesPath + "biotapestry.csv", 'w') f2.write(bioTap) f2.close() # export SBML model text if exportData == True: sbmlStr = model.getSBML() te.saveToFile(filesPath + 'OrigModel.xml', sbmlStr) # export Antimony model text if exportData == True: if np.DataSource().exists(filesPath + 'OrigAntimony.txt'): print('Warning: ' + filesPath + 'OrigAntimony.txt already exists! Preventing overwrite.') else: fh = open(filesPath + 'OrigAntimony.txt', 'w') fh.write(str(antStr)) print('\nData Saved!\n')
def main(): trained_data = np.DataSource() if trained_data.exists('centroids.npz'): print "Read already trained data...\n" with np.load('centroids.npz') as data: centroids = data['centroids'] labels = data['labels'] classifier = NearestCentroidClassifier(centroids, labels) classifier.set_data(centroids, labels) else: print "Reading data...\n" images, labels = get_data('train-labels.idx1-ubyte', 'train-images.idx3-ubyte') classifier = NearestCentroidClassifier(images, labels) print "Train classifier...\n" classifier.train() test_images, test_labels = get_data('t10k-labels.idx1-ubyte', 't10k-images.idx3-ubyte') print "Classify test data...\n" classified_images = classifier.classify(test_images) right = 0 print "Check results...\n" for i in xrange(0, len(classified_images)): if classified_images[i] == test_labels[i]: right = right + 1 print str(right / len(classified_images) * 100) + " percent of the test data were classified correctly.\n" print "\nFrom 10,000 digits, " + str(right) + " were classified correctly."