Ejemplo n.º 1
0
 def preprocessing(self):
 # step 1: preprocessing
     if np.DataSource().exists("feat.npy") and np.DataSource().exists("label.npy"):
         features, labels = np.load('feat.npy'), np.load('label.npy')
     else:
         features, labels = feature_extraction(self.train_dir)
         np.save('feat.npy', features)
         np.save('label.npy', labels)
     print (features, labels)    
         
     return features, labels
Ejemplo n.º 2
0
def from_homebuilt_APDscan_ASCII_triplet(partialfilepath,
                                         name=None,
                                         parent=None):
    ends = ["_A_set.txt", "_B_Mtr.txt", "_C_Mrt.txt"]
    for end in ends:
        partialfilepath = _strip_end(partialfilepath, end)
    filestrs = [os.fspath(partialfilepath + end) for end in ends]
    fs = [np.DataSource(None).open(filestr, "rb") for filestr in filestrs]
    # first harvest some metadata from A_set file
    arr = np.loadtxt(fs[0], max_rows=1, dtype=int)
    x0, y0, extent, pixels = arr[0], arr[1], arr[2], arr[3]
    x = np.linspace(x0, extent + x0, pixels)
    y = np.linspace(y0, extent + y0, pixels)
    # grab trace and retrace data
    trace = np.genfromtxt(fs[1], unpack=True)
    retrace = np.genfromtxt(fs[2], unpack=True)
    # parse name
    if name is None:
        name = pathlib.Path(partialfilepath).stem
    # create data
    kwargs = {"name": name, "kind": "APDscan", "source": filestrs[1]}
    if parent is None:
        data = wt.Data(**kwargs)
    else:
        data = parent.create_data(**kwargs)
    data.create_variable("x", values=x[:, None], units="um")
    data.create_variable("y", values=y[None, :], units="um")
    data.create_channel("trace", values=trace)
    data.create_channel("retrace", values=retrace)
    data.transform("x", "y")
    for f in fs:
        f.close()
    return data
Ejemplo n.º 3
0
def get_sample_data(n_sess, full_brain=False, subj=1):
    """
    Download the data for the current session and subject

    Parameters
    ----------
    n_sess: int
        number of session, one of {0, 1, 2, 3, 4}
    subj: int 
        number of subject, one of {1, 2}
    """
    DIR = tempfile.mkdtemp()
    ds = np.DataSource(DIR)
    BASEDIR = 'http://fa.bianp.net/projects/hrf_estimation/data'
    BASEDIR_COMMON = BASEDIR + '/data_common/'
    if full_brain:
        BASEDIR += '/full_brain'
    BASEDIR_SUBJ = BASEDIR + '/data_subj%s/' % subj
    event_matrix = io.mmread(ds.open(BASEDIR_COMMON +
                                     'event_matrix.mtx')).toarray()
    print('Downloading BOLD signal')
    voxels = np.load(ds.open(BASEDIR_SUBJ + 'voxels_%s.npy' % n_sess))
    # print('Downloading Scatting Stim')
    # scatt_stim = np.load(ds.open(
    #     BASEDIR_SUBJ + 'scatt_stim_%s.npy' % n_sess))

    em = sparse.coo_matrix(event_matrix)
    fir_matrix = utils.convolve_events(event_matrix, np.eye(HRF_LENGTH))
    events_train = sparse.block_diag([event_matrix] * 5).toarray()
    conditions_train = sparse.coo_matrix(events_train).col
    onsets_train = sparse.coo_matrix(events_train).row

    return voxels, conditions_train, onsets_train
Ejemplo n.º 4
0
 def numpy(self):
     """
     :return: array (numpy void)
     Returns the data from the query in the form of a numpy void array.
     """
     return numpy.load(
         numpy.DataSource(None).open(self.makeurl('npy'), 'rb'))
Ejemplo n.º 5
0
def GD(max_iter, tol, eta):
    iterations = 0
    F = numpy.array([])  # Stores the values of the function
    Epoch = numpy.array([])
    while iterations < max_iter:
        if iterations == 0:
            # Generate Initial Guess and Book Keeping
            if numpy.DataSource().exists('InitialGuess.txt'):
                W = numpy.loadtxt('InitialGuess.txt')  # Load the Initial Weights
            else:
                W = Initalisation()
                numpy.savetxt('InitialGuess.txt', W)  # Generate the Weights and save them
            W = numpy.reshape(W, (1, 2))
            f_temp = Eval_Func(W[-1, 0], W[-1, 1])
            F = numpy.concatenate((F, [f_temp]), axis=0)
            Epoch = numpy.concatenate((Epoch, [iterations]), axis=0)
            print('No. of Iterations: ', iterations, ' Points: ', W[-1], ' Function Value: ', F[-1], '\n')
            iterations += 1
        else:
            # Run The Gradient Descent Algorithm
            w_temp = Update_Weights(W[-1, 0], W[-1, 1], eta)
            f_temp = Eval_Func(W[-1, 0], W[-1, 1])
            # Book Keeping
            W = numpy.concatenate((W, [w_temp]), axis=0)
            F = numpy.concatenate((F, [f_temp]), axis=0)
            Epoch = numpy.concatenate((Epoch, [iterations]), axis=0)
            print('No. of Iterations: ', iterations, ' Points: ', W[-1], ' Function Value: ', F[-1], '\n')
            # Check for Close Weights
            if (W[-1] - W[-2]).all() < tol:
                print('Optimal Value Reached')
                break
            else:
                iterations += 1
    return Epoch, W, F
Ejemplo n.º 6
0
def read_atsp(url):
    
    '''
    Motivation: for some reason, the .atsp-file has 212 rows (not 53!). So,
    it needs to be manually processed.
    
    The function takes url (string) and returns a numpy array
    with edge costs.
    '''
    
    # Reads external file with numpy, 
    # spicifies the number of lines to skip,
    # initializes a lst to write to:  
    ds = np.DataSource()
    input_f = ds.open(url)
    num_lines_skip = 6
    lst = []
    
    # Appends each line to a list except for unuseful info:  
    for i, line in enumerate(input_f):
    
        if (i <= num_lines_skip) or ('EOF' in line):
            continue

        else:
            lst.append(line)

    # Concatenate each read line into one string,
    # removes all carriage returns and trailling whitespaces:  
    lst = ''.join(lst).replace('\n', '').split()
    
    # Return numpy array, reshape it into 53 x 53 matrix:  
    return np.array(lst, dtype=int).reshape(53, 53)
Ejemplo n.º 7
0
def read_bin(fname,
             fdir,
             fnum,
             minfo,
             numtype=np.longdouble,
             getfilename=True):
    '''Read in a floating point array'''
    filename = fdir + fname + '_' + fnum + '.mesh'
    datas = np.DataSource()
    read_ok = datas.exists(filename)

    my_dtype = set_dtype(minfo.contents['endian'],
                         minfo.contents['AcRealSize'],
                         print_type=getfilename)

    if read_ok:
        if getfilename:
            print(filename)
        array = np.fromfile(filename, dtype=my_dtype)

        timestamp = array[0]

        array = np.reshape(array[1:],
                           (minfo.contents['AC_mx'], minfo.contents['AC_my'],
                            minfo.contents['AC_mz']),
                           order='F')
    else:
        array = None
        timestamp = None

    return array, timestamp, read_ok
Ejemplo n.º 8
0
 def __init__(self):
     self.d = int(ceil(e/EPSILON))
     self.w = int(ceil(log1p(1/HIPOTHESIS_P)))
     date = lastWeekday(dt.datetime.now())
     if (not np.DataSource().exists(ATM_MODEL.format(str(date.day)))):
         if dt.datetime.now().hour >= RELOAD_TIME:
             self.fname = ATM_MODEL.format(str(date.day))
         else:
             self.fname = ATM_MODEL.format(str(lastWeekday(date - dt.timedelta(days=1))))
         np.save(self.fname,np.array([[0 for i in range(self.w)] for j in range(self.d)]))
Ejemplo n.º 9
0
def from_BrunoldrRaman(filepath, name=None, parent=None, verbose=True) -> Data:
    """Create a data object from the Brunold rRaman instrument.

    Expects one energy (in wavenumbers) and one counts value.

    Parameters
    ----------
    filepath : path-like
        Path to .txt file.
        Can be either a local or remote file (http/ftp).
        Can be compressed with gz/bz2, decompression based on file name.
    name : string (optional)
        Name to give to the created data object. If None, filename is used.
        Default is None.
    parent : WrightTools.Collection (optional)
        Collection to place new data object within. Default is None.
    verbose : boolean (optional)
        Toggle talkback. Default is True.

    Returns
    -------
    data
        New data object(s).
    """
    # parse filepath
    filestr = os.fspath(filepath)
    filepath = pathlib.Path(filepath)

    if not ".txt" in filepath.suffixes:
        wt_exceptions.WrongFileTypeWarning.warn(filepath, ".txt")
    # parse name
    if not name:
        name = filepath.name.split(".")[0]
    # create data
    kwargs = {"name": name, "kind": "BrunoldrRaman", "source": filestr}
    if parent is None:
        data = Data(**kwargs)
    else:
        data = parent.create_data(**kwargs)
    # array
    ds = np.DataSource(None)
    f = ds.open(filestr, "rt")
    arr = np.genfromtxt(f, delimiter="\t").T
    f.close()
    # chew through all scans
    data.create_variable(name="energy", values=arr[0], units="wn")
    data.create_channel(name="signal", values=arr[1])
    data.transform("energy")
    # finish
    if verbose:
        print("data created at {0}".format(data.fullpath))
        print("  range: {0} to {1} (wn)".format(data.energy[0],
                                                data.energy[-1]))
        print("  size: {0}".format(data.size))
    return data
Ejemplo n.º 10
0
def learnImage(weight_path, model_path='model', load_weight=False):
    img_width, img_height = 50, 50
    channel = 3
    nb_train_samples = 58
    nb_validation_samples = 58
    epochs = 300
    batch_size = 14
    train_data_dir = 'data/train'  # Database for learning
    validation_data_dir = 'data/validation'  # Database for testing

    if K.image_data_format() == 'channels_first':
        input_shape = (channel, img_width, img_height)
    else:
        input_shape = (img_width, img_height, channel)


#  model = Cnn(input_shape)

    model = Vgg16(input_shape)

    if load_weight and np.DataSource().exists(weight_path):
        model.load_weights(weight_path)

    model.compile(loss='binary_crossentropy',
                  optimizer=Adam(lr=1e-3),
                  metrics=['accuracy'])

    train_datagen = ImageDataGenerator(rescale=1. / 255,
                                       shear_range=0.2,
                                       zoom_range=0.2,
                                       horizontal_flip=True)

    test_datagen = ImageDataGenerator(rescale=1. / 255)

    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='binary')

    validation_generator = test_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='binary')

    model.fit_generator(train_generator,
                        steps_per_epoch=nb_train_samples // batch_size,
                        epochs=epochs,
                        validation_data=validation_generator,
                        validation_steps=nb_validation_samples // batch_size)

    model.save_weights(weight_path)
    model.save(model_path)
    print('save in ' + weight_path)
Ejemplo n.º 11
0
def from_ocean_optics(filepath, name=None, *, parent=None, verbose=True) -> Data:
    """Create a data object from an Ocean Optics brand spectrometer.

    Parameters
    ----------
    filepath : path-like
        Path to an ocean optics output file.
        Can be either a local or remote file (http/ftp).
        Can be compressed with gz/bz2, decompression based on file name.
    name : string (optional)
        Name to give to the created data object. If None, filename is used.
        Default is None.
    parent : WrightTools.Collection (optional)
        Collection to place new data object within. Default is None.
    verbose : boolean (optional)
        Toggle talkback. Default is True.

    Returns
    -------
    data
        New data object.
    """
    # parse filepath
    filestr = os.fspath(filepath)
    filepath = pathlib.Path(filepath)

    if not ".scope" in filepath.suffixes:
        wt_exceptions.WrongFileTypeWarning.warn(filepath, ".scope")
    # parse name
    if not name:
        name = filepath.name.split(".")[0]
    # create data
    kwargs = {"name": name, "kind": "Ocean Optics", "source": filestr}
    if parent is None:
        data = Data(**kwargs)
    else:
        data = parent.create_data(**kwargs)
    # array
    skip_header = 14
    skip_footer = 1
    ds = np.DataSource(None)
    f = ds.open(filestr, "rt")
    arr = np.genfromtxt(f, skip_header=skip_header, skip_footer=skip_footer, delimiter="\t").T
    f.close()
    # construct data
    data.create_variable(name="energy", values=arr[0], units="nm")
    data.create_channel(name="signal", values=arr[1])
    data.transform("energy")
    # finish
    if verbose:
        print("data created at {0}".format(data.fullpath))
        print("  range: {0} to {1} (nm)".format(data.energy[0], data.energy[-1]))
        print("  size: {0}".format(data.size))
    return data
Ejemplo n.º 12
0
def load_dataset():
    ds = np.DataSource()
    if ds.exists('trainX.npy') and ds.exists('trainY.npy') and ds.exists(
            'testX.npy'):
        mylogger.info('exist saved file. load.')
        trainX = np.load('trainX.npy')
        trainY = np.load('trainY.npy')
        testX = np.load('testX.npy')
        return trainX, trainY, testX
    # train_ori_X = read_train_text_to_list('../data/trainData.txt')
    train_ori_Y = read_train_text_to_list('../data/trainLabel.txt')
    train_ori_Y = np.array([int(y) for y in train_ori_Y])
    # test_ori_X = read_train_text_to_list('../data/testData.txt')

    train_sentences = list(read_raw_documents('../data/trainData.txt'))
    test_sentences = list(
        read_raw_documents('../data/testData.txt', tokens_only=True))

    # # 数据预处理 & 特征工程
    # 1. Count Vectors as feature
    # 2. TF-IDF Vectors as festures
    # 3. Word Embeddings as features
    # 4. Text/NLP based features
    # 5. Topic Models as features

    vector_size = 50
    model = gensim.models.doc2vec.Doc2Vec(vector_size=vector_size,
                                          min_count=2,
                                          epochs=40)

    model.build_vocab(train_sentences)

    model.train(train_sentences,
                total_examples=model.corpus_count,
                epochs=model.epochs)

    n_train_samples = len(train_sentences)
    n_test_samples = len(test_sentences)
    vector_size = 50
    train_X = np.zeros((n_train_samples, vector_size))
    test_X = np.zeros((n_test_samples, vector_size))
    for i in range(0, n_train_samples):
        train_X[i] = model.infer_vector(train_sentences[i][0])
    for i in range(0, n_test_samples):
        test_X[i] = model.infer_vector(test_sentences[i])

    train_Y = train_ori_Y

    train_X.shape
    np.save('trainX', train_X)
    np.save('trainY', train_Y)
    np.save('testX', test_X)
    return train_X, train_Y, test_X
Ejemplo n.º 13
0
def load_prior(cfg, verbose=False):

    core = cfg.core
    prior = cfg.prior
    nexp = core.nexp
    workdir = core.datadir_output

    begin_time = time()

    # Define the number of assimilation times
    recon_times = np.arange(core.recon_period[0], core.recon_period[1] + 1,
                            core.recon_timescale)
    ntimes, = recon_times.shape

    # prior
    if verbose: print('Source for prior: ', prior.prior_source)

    # Assign prior object according to "prior_source" (from namelist)
    X = LMR_prior.prior_assignment(prior.prior_source)
    X.prior_datadir = prior.datadir_prior
    X.prior_datafile = prior.datafile_prior
    X.statevars = prior.state_variables
    X.statevars_info = prior.state_variables_info
    X.Nens = core.nens
    X.anom_reference = prior.anom_reference
    X.detrend = prior.detrend
    X.avgInterval = prior.avgInterval

    # Read data file & populate initial prior ensemble
    X.populate_ensemble(prior.prior_source, prior)
    Xb_one_full = X.ens

    # Prepare to check for files in the prior (work) directory (this object just
    # points to a directory)
    prior_check = np.DataSource(workdir)

    # this is a hack that skips over regridding option
    X.trunc_state_info = X.full_state_info
    Xb_one = Xb_one_full
    Xb_one_coords = X.coords
    [Nx, _] = Xb_one.shape

    # Keep dimension of pre-augmented version of state vector
    [state_dim, _] = Xb_one.shape

    if verbose:
        elapsed_time = time() - begin_time
        print('-----------------------------------------------------')
        print('completed in ' + str(elapsed_time) + ' seconds')
        print('-----------------------------------------------------')

    return X, Xb_one
Ejemplo n.º 14
0
def get_eops():
    """
   This function downloads the Earth Orientation Parameters (EOPs) from the IAU sources and returns them as a pandas
        dataframe; https://datacenter.iers.org/eop.php
    """
    url = 'ftp://hpiers.obspm.fr/iers/eop/eopc04/eopc04_IAU2000.62-now'
    ds = np.DataSource(path)
    file = ds.open(url)
    array = np.genfromtxt(file, skip_header=14)
    headers = [
        'Year', 'Month', 'Day', 'MJD', 'x', 'y', 'UT1-UTC', 'LOD', 'dX', 'dY',
        'x Err', 'y Err', 'UT1-UTC Err', 'LOD Err', 'dX Err', 'dY Err'
    ]
    eop = pd.DataFrame(data=array, index=array[:, 3], columns=headers)
    return eop
    def read(self, train, batch_size=1, num_samples=1, num_epochs=1):
        if train and np.DataSource().exists(self.tfrecord_name + "_train" + ".tfrecord"):
            tfrecord = self.tfrecord_name + "_train" + ".tfrecord"
        else:
            tfrecord = self.tfrecord_name + "_test" + ".tfrecord"

        dataset = tf.data.TFRecordDataset([self.load_path + tfrecord])
        dataset = dataset.map(self.decode)
        if train:
            dataset = dataset.shuffle(num_samples)
        dataset = dataset.repeat()
        dataset = dataset.batch(batch_size, drop_remainder=True)
        iterator = dataset.make_one_shot_iterator()

        return iterator.get_next()
    def __init__(self):
        if not np.DataSource().exists(INBOUND_FILENAME):
            self._parse_raw_data()
        else:
            with open(INBOUND_FILENAME, 'rb') as inbound_file:
                self.inbound = pickle.load(inbound_file)
            with open(OUTBOUND_FILENAME, 'rb') as outbound_file:
                self.outbound = pickle.load(outbound_file)

        # Building undirect dict
        self.undirect = dict()
        for key in self.inbound:
            self.undirect[key] = self.inbound[key]
            if key in self.outbound:
                self.undirect[key] |= self.outbound[key]
def psf_beads_analysis():
    temp_dir = path.abspath(TEST_DATA_DIR)
    file_name = '20191206_100xOil_A647_Cy3_FITC_DAPI_ri-1.512_na-1.4_100nm_561_002_SIR.npy'
    file_url = 'http://dev.mri.cnrs.fr/attachments/download/2295/psf_beads_EM-488_MAG-40.npy'
    try:
        data = np.load(path.join(temp_dir, file_name))
    except FileNotFoundError as e:
        repos = np.DataSource(temp_dir)
        repos.open(file_url)

    analysis = psf_beads.PSFBeadsAnalysis()
    analysis.input.data = {'beads_image': data}
    analysis.set_metadata('theoretical_fwhm_lateral_res', 0.300)
    analysis.set_metadata('theoretical_fwhm_axial_res', 0.800)
    analysis.set_metadata('pixel_size', (.35, .06, .06))

    return analysis
def argolight_b():
    temp_dir = path.abspath(TEST_DATA_DIR)
    file_name = '201702_RI510_Argolight-1-1_010_SIR_ALX.npy'
    file_url = 'http://dev.mri.cnrs.fr/attachments/download/2290/201702_RI510_Argolight-1-1_010_SIR_ALX.npy'
    try:
        data = np.load(path.join(temp_dir, file_name))
    except FileNotFoundError as e:
        repos = np.DataSource(temp_dir)
        repos.open(file_url)
        raise Exception from e

    analysis = argolight.ArgolightBAnalysis()
    analysis.input.data = {'argolight_b': data}
    analysis.set_metadata('spots_distance', 5)
    analysis.set_metadata('pixel_size', (.125, .39, .39))

    return analysis
Ejemplo n.º 19
0
def psf_beads_analysis():
    temp_dir = path.abspath(TEST_DATA_DIR)
    file_name = 'psf_beads_EM-488_MAG-40.npy'
    file_url = ''
    try:
        data = np.load(path.join(temp_dir, file_name))
    except FileNotFoundError as e:
        repos = np.DataSource(temp_dir)
        repos.open(file_url)

    analysis = psf_beads.PSFBeadsAnalysis()
    analysis.input.data = {'beads_image': data}
    analysis.set_metadata('theoretical_fwhm_lateral_res', 0.300)
    analysis.set_metadata('theoretical_fwhm_axial_res', 0.800)
    analysis.set_metadata('pixel_size', (.35, .06, .06))

    return analysis
Ejemplo n.º 20
0
    def __init__(self):
        #Load the similarity array
        array_url = 'https://s3-us-west-1.amazonaws.com/materialsintelligence/matminer_array.npy'
        ds = np.DataSource()
        ds.open(array_url)
        self.matminer_array = np.load(ds.abspath(array_url))

        #Other data
        mat2index_url = 'https://s3-us-west-1.amazonaws.com/materialsintelligence/mat2index.p'
        index2mat_url = 'https://s3-us-west-1.amazonaws.com/materialsintelligence/index2mat.p'
        scaler_url = 'https://s3-us-west-1.amazonaws.com/materialsintelligence/scaler.p'
        self.mat2index = pickle.load(ds.open(mat2index_url, 'rb'))
        self.index2mat = pickle.load(ds.open(index2mat_url, 'rb'))
        self.scaler = pickle.load(ds.open(scaler_url, 'rb'))

        #Mat parser
        self.parser = SimpleParser()
Ejemplo n.º 21
0
def get_data(d):
    """Return train and test set."""
    data = np.DataSource()
    if d == 'train':
        out = data.open('http://archive.ics.uci.edu/ml/'
                        'machine-learning-databases/'
                        'poker/poker-hand-training-true.data')
    elif d == 'test':
        out = data.open('http://archive.ics.uci.edu/ml/'
                        'machine-learning-databases/'
                        'poker/poker-hand-testing.data')
    else:
        raise ValueError("Not valid data option.")

    X = np.loadtxt(out, delimiter=",")
    y = X[:, -1]
    X = X[:, :-1]
    return X, y
Ejemplo n.º 22
0
def readFile(fileName):
	deviceList =[]
	numOfDevices = 0

	# Check if file exists in THIS directory or src/ directory
	if np.DataSource().exists(fileName):
		readfile = open(fileName, "r")
	else:
		readfile = open('src/'+fileName, "r")
	for line in readfile:
		# Remove leading and trailing spaces
		line = line.strip()

		# Skip commented out and blank lines (\n) which have a len of 2
		if not line.startswith('#') and not line.startswith('//') and len(line) > 2:
			line = line.strip("\r\n")
			line = line.split(" ")
			deviceList.append([line[0], line[1], line[2]])
			numOfDevices += 1
	return numOfDevices, deviceList
Ejemplo n.º 23
0
def open(filepath, edit_local=False):
    """Open any wt5 file, returning the top-level object (data or collection).

    Parameters
    ----------
    filepath : path-like
        Path to file.
        Can be either a local or remote file (http/ftp).
        Can be compressed with gz/bz2, decompression based on file name.
    edit_local : boolean (optional)
        If True, the file itself will be opened for editing. Otherwise, a
        copy will be created. Default is False.

    Returns
    -------
    WrightTools Collection or Data
        Root-level object in file.
    """
    filepath = os.fspath(filepath)
    ds = np.DataSource(None)
    if edit_local is False:
        tf = tempfile.mkstemp(prefix="", suffix=".wt5")
        with _open(tf[1], "w+b") as tff:
            with ds.open(str(filepath), "rb") as f:
                tff.write(f.read())
        filepath = tf[1]
    f = h5py.File(filepath, "r")
    class_name = f["/"].attrs["class"]
    name = f["/"].attrs["name"]
    f.close()
    if class_name == "Data":
        obj = wt_data.Data(filepath=str(filepath), name=name, edit_local=True)
    elif class_name == "Collection":
        obj = wt_collection.Collection(filepath=str(filepath), name=name, edit_local=True)
    else:
        obj = wt_group.Group(filepath=str(filepath), name=name, edit_local=True)

    if edit_local is False:
        setattr(obj, "_tmpfile", tf)
        weakref.finalize(obj, obj.close)
    return obj
Ejemplo n.º 24
0
def read_PETSc_vec(file):
    import numpy
    dsource = numpy.DataSource()
    # open file
    # omit header
    # read length
    # read values
    # close file

    try:
        f = open(file, "rb")
    except:
        print("Unexpected error:", sys.exc_info()[0], file)

    numpy.fromfile(f, dtype=">i4", count=1)
    nvec = numpy.fromfile(f, dtype=">i4", count=1)
    #load data and change it to little endian, importend for np.dot
    v = numpy.fromfile(f, dtype=">f8", count=nvec[0]).astype('<f8')
    f.close()

    return v
Ejemplo n.º 25
0
def load_url(url, augment):
    repo = np.DataSource()
    file = repo.open(url)
    mat_data = loadmat(file.name)  # Load the mat file data
    X = np.moveaxis(mat_data['X'], -1, 0)  # Get features from dataset
    y = mat_data['y'].flatten() - 1  # Get class from dataset

    # If augment option is turned on,
    # insert augmentation data to original data and class set
    if augment:
        dataList = []
        classList = []
        for img_element, class_element in zip(X, y):
            aug_images = img_augmenting(img_element)
            for aug in aug_images:
                dataList.append(aug)
                classList.append(class_element)
        X = np.vstack(dataList)
        y = np.vstack(classList).flatten()
    X = np.true_divide(X, 255.0)
    return X, y
Ejemplo n.º 26
0
    def __init__(self):
        """
        The constructor for the Cluster Plot object
        :param entity_type: 'all' or 'materials'
        :param limit: number of most common entities to plot
        :param heatphrase: color according to similarity to this phrase
        :param wordphrases: filter to show only the specified phrases
        """

        ds = np.DataSource()
        # material_names_url = "https://s3-us-west-1.amazonaws.com/materialsintelligence/material_map_tsne_words.npy"
        material_coords_url = "https://s3-us-west-1.amazonaws.com/materialsintelligence/final_material_map_atl10_30_ee12_lr200.npy"

        # ds.open(material_names_url)
        ds.open(material_coords_url)

        self.ee = EmbeddingEngine()
        self.embs = self.ee.embeddings / self.ee.norm
        # materials_json = urlopen("https://s3-us-west-1.amazonaws.com/matstract/material_map_10_mentions.json")
        # materials_data = materials_json.read().decode("utf-8")
        # self.materials_tsne_data = json.loads(materials_data)["data"][0]
        # self.norm_matnames = [self.ee.dp.get_norm_formula(m) for m in self.materials_tsne_data["text"]]
        # self.matname2index = dict()
        # for i, label in enumerate(self.norm_matnames):
        #     self.matname2index[label] = i

        self.materials_tsne_data = np.load(ds.abspath(material_coords_url))
        formula_counts = dict()
        for formula in self.ee.formulas_full:
            formula_counts[formula] = 0
            for elem in self.ee.formulas_full[formula]:
                formula_counts[formula] += self.ee.formulas_full[formula][elem]

        mat_counts = sorted(formula_counts.items(), key=lambda x: x[1], reverse=True)
        mat_counts = [mat_count for mat_count in mat_counts if mat_count[1] >= 10]

        self.norm_matnames = [m[0] for m in mat_counts]
        self.matname2index = dict()
        for i, label in enumerate(self.norm_matnames):
            self.matname2index[label] = i
Ejemplo n.º 27
0
def read(filepath, *, encoding="utf-8"):
    """Read headers from given filepath.

    Parameters
    ----------
    filepath : path-like or iterable of strings
        Path to file or iterable of stings
    encoding : str
        Encoding to use when opening the file.
        No effect if iterable of strings given.

    Returns
    -------
    collections.OrderedDict
        Dictionary containing header information.
    """
    headers = collections.OrderedDict()
    ds = np.DataSource(None)
    # The following code is adapted from np.genfromtxt source
    try:
        if isinstance(filepath, os.PathLike):
            filepath = os.fspath(filepath)
        if isinstance(filepath, str):
            fhd = iter(ds.open(filepath, "rt", encoding=encoding))
        else:
            fhd = iter(filepath)
    except TypeError:
        raise TypeError(
            "filepath must be a path-like, list of strings, "
            "or generator. Got %s instead." % type(filepath)
        )
    for line in fhd:
        if line[0] == "#":
            split = re.split(": |:\t", line, maxsplit=1)
            key = split[0][2:]
            headers[key] = string2item(split[1])
        else:
            break  # all header lines are at the beginning
    return headers
Ejemplo n.º 28
0
 def process(self):
     while True:
         try:
             if np.DataSource().exists("model.h5"):
                 from keras.models import load_model
                 model = load_model('model.h5')
             else:
                 model = train(features, labels, epochs=100) 
                 model.save('model.h5')   
             # step 3: prediction 更改filepath,go through all folder 得到file path,在init文件里面predict function里面更改,可以自动检索文件然后不断运行,达到实时测试的效果
             for root, dirs, files in os.walk(self.test_dir):
                 for file in files:
                         filepath = os.path.join(root, file)
                         print(filepath)
                         pred= predict(model=model,data_path=filepath)
                         print_leaderboard(pred=pred,data_path=self.train_dir)
                         os.remove(filepath)
                         #option1: delete it search for the command in python to delete file 
                         
                         #option2: os.path.getmtime()how to check the folder is updated 
         except KeyboardInterrupt as e:
             #使用cmd+c退出
             break
Ejemplo n.º 29
0
def Output(exportData, model, seed, result, noiseLevel, resultNoisy, filesPath,
           antStr, bioTap, selections, filename):
    # export csv of results

    # do not want to give students the clean data
    #data = {next_name:result[:,i] for i,next_name in enumerate(selections)}
    #df = pd.DataFrame.from_dict(data)
    #df.set_index('time', inplace=True)
    #df.to_csv(filesPath + "Results_Clean2.csv")

    data = {
        next_name: resultNoisy[:, i]
        for i, next_name in enumerate(selections)
    }
    df = pd.DataFrame.from_dict(data)
    df.set_index('time', inplace=True)
    df.to_csv(filesPath + filename + ".csv")

    # export csv file for importing into Biotapestry
    if bioTap != '':
        f2 = open(filesPath + "biotapestry.csv", 'w')
        f2.write(bioTap)
        f2.close()
    #  export SBML model text
    if exportData == True:
        sbmlStr = model.getSBML()
        te.saveToFile(filesPath + 'OrigModel.xml', sbmlStr)
    # export Antimony model text
    if exportData == True:
        if np.DataSource().exists(filesPath + 'OrigAntimony.txt'):
            print('Warning: ' + filesPath +
                  'OrigAntimony.txt already exists! Preventing overwrite.')
        else:
            fh = open(filesPath + 'OrigAntimony.txt', 'w')
            fh.write(str(antStr))

    print('\nData Saved!\n')
def main():
    trained_data = np.DataSource()

    if trained_data.exists('centroids.npz'):
        print "Read already trained data...\n"
        with np.load('centroids.npz') as data:
            centroids = data['centroids']
            labels = data['labels']
            classifier = NearestCentroidClassifier(centroids, labels)
            classifier.set_data(centroids, labels)
    else:
        print "Reading data...\n"
        images, labels = get_data('train-labels.idx1-ubyte',
                                  'train-images.idx3-ubyte')

        classifier = NearestCentroidClassifier(images, labels)

        print "Train classifier...\n"
        classifier.train()

    test_images, test_labels = get_data('t10k-labels.idx1-ubyte',
                                        't10k-images.idx3-ubyte')
    print "Classify test data...\n"

    classified_images = classifier.classify(test_images)

    right = 0

    print "Check results...\n"
    for i in xrange(0, len(classified_images)):
        if classified_images[i] == test_labels[i]:
            right = right + 1

    print str(right / len(classified_images) *
              100) + " percent of the test data were classified correctly.\n"
    print "\nFrom 10,000 digits, " + str(right) + " were classified correctly."