def load_data(path):
    """
    load data from tiny-imagenet
    note that in validation set, label information is in val_annotations.txt
    """
    train_size = 100000
    val_size = 10000
    test_size = 10000

    # for training data set
    X_train = np.zeros((train_size, 3, 64, 64), dtype="uint8")
    # y_train = np.zeros((train_size,), dtype="str")
    y_train = np.chararray((train_size,), itemsize=10)

    # for validation data set
    X_val = np.zeros((val_size, 3, 64, 64), dtype="uint8")
    # y_val = np.zeros((val_size,), dtype="str")
    y_val = np.chararray((val_size,), itemsize=10)

    #path_train = os.path.join(path, 'train')
    #path_val = os.path.join(path, 'val')

    print "load training data..."
    for idx, (label, img) in enumerate(read_files(path,'train')):
        # reshape (64, 64, 3) -> (3, 64, 64)
        # gray color image is combined ... e.g. n04366367_182.JPEG
        # Grey-scale means that all values have the same intensity. Set all channels
        # (in RGB) equal to the the grey value and you will have the an RGB black and
        # white image.
        if img.ndim == 2:
            img = np.array([img[:, :], img[:, :], img[:, :]])
        elif img.ndim == 3:
            img = np.array([img[:, :, 0], img[:, :, 1], img[:, :, 2]])
        X_train[idx, :, :, :] = img
        y_train[idx] = label

    # change text label(n04366367, ...) to (0, 1, 2, ...)
    print "encoding labels for training data..."
    le = LabelEncoder()
    y_train = le.fit_transform(y_train)

    print "load validation data..."
    for idx, (label, img) in enumerate(read_files(path,'val')):
        # reshape (64, 64, 3) -> (3, 64, 64)
        # gray color image is combined ... e.g. n04366367_182.JPEG
        # Grey-scale means that all values have the same intensity. Set all channels
        # (in RGB) equal to the the grey value and you will have the an RGB black and
        # white image.
        if img.ndim == 2:
            img = np.array([img[:, :], img[:, :], img[:, :]])
        elif img.ndim == 3:
            img = np.array([img[:, :, 0], img[:, :, 1], img[:, :, 2]])
        X_val[idx, :, :, :] = img
        y_val[idx] = label

    # change text label(n04366367, ...) to (0, 1, 2, ...)
    print "encoding labels for validation data..."
    y_val = le.transform(y_val.tolist())

    return le, (X_train, y_train), (X_val, y_val)
def crea_liste(month, year):
    calendario = calendar.Calendar().itermonthdays(year, month)
    day = ["Lunedi", "Martedi", "Mercoledi", "Giovedi", "Venerdi", "Sabato", "Domenica"]

    cont = 0
    tupla = []
    tupla2 = []
    mese = str(month) + "/" + str(year)

    for i in calendario:
        tupla.append(i)
        tupla2.append(day[cont % len(day)])
        cont += 1

    tupla3 = []
    tupla4 = []

    for i in xrange(0, len(tupla)):
        if tupla[i] != 0:
            tupla3.append(tupla[i])
            tupla4.append(tupla2[i])

    tupla = None
    tupla2 = None

    # Inizializzo la matrice
    mat = numpy.chararray((len(tupla3), 7))
    mat = numpy.chararray(mat.shape, itemsize="40")
    mat[:] = " "

    return [tupla4, mat, mese]
def partition(data_set, target_set, training_ratio):
    # Number of targets (3 targets here, either 'a', 'b', or 'c')
    n_targets = len(np.unique(target_set))
    print "Number of target values: %d" % n_targets
    # Number of samples taken per target (23 in this example)
    n_samples = len(data)/n_targets
    print "Number of samples per target: %d" % n_samples
    # Size of traning set
    training_samples = int(round(n_samples * training_ratio))
    print "Size of training set: %d" % training_samples
    # Size of test set
    test_samples = int(round(n_samples - training_samples))
    print "Size of test set: %d" % test_samples
    # Array to hold the targets for the training set
    train_target = np.chararray(int(n_targets*training_samples))
    # Array to hold the targes for the testing set
    test_target = np.chararray(int(n_targets*test_samples))
    # Matrix to hold the training data
    train_data = np.empty([len(train_target), len(data[1])])
    # Matrix to hold the test data
    test_data = np.empty([len(test_target), len(data[1])])
    # Initialize values for each array/matrix to its corresponding value
    for target in np.arange(n_targets):
        for i in np.arange(n_samples):
            if i<= training_samples-1:
                train_target[i+(target*training_samples-1)] = target_set[target*n_samples]
                train_data[i+(target*training_samples-1)] = data_set[i+(target*n_samples-1)]
            else:
                test_target[(target*test_samples)+n_samples-i-1] = target_set[target*n_samples]
                test_data[(target*test_samples)+n_samples-i-1] = data_set[i+(target*n_samples-1)]
    return train_data, train_target, test_data, test_target
Exemple #4
0
def most_weighted(X, CV, n=10, save=False):
    """Finds the most weighted words within an array.

    Args
    ----
    X: ndarray
        Term-document array with books on each row and words
        for each column
    CV: vectorizer object
        Provides the vocabulary. 
        Can either be a CountVectoriser or TfidfVectoriser.
    n: int
        The top n weighted words
    save: bool
        Whether to save out 

    Returns
    -------
    out: ndarray
        Array of Strings
    """

    # You have a 2d array: rows are books, cols are words
    # You have a vocab dict with 'word': array_index
    vocab = CV.vocabulary_

    # Create zeros array of size vocab
    vocab_array = np.chararray((len(vocab),), itemsize=18)
    # Convert dict to an actual 1D array, where you have the right word at the right index
    for k, v in vocab.items():
        vocab_array[v] = k

    # Get the sorted indices
    ind = X.argsort(axis=1)

    out = np.chararray((ind.shape[0], n), itemsize=18)

    # For each row in ind
    for i in range(ind.shape[0]):
        # Grab the row from ind (this is the ordering you need to make it sorted)
        ind_row = ind[i, :]
        
        # Index your 1D words at indexes array with the row from ind - which puts it in order
        # (basically, sorts according to the counts from X)
        sorted = vocab_array[ind_row]

        # Grab the last N values using [-n:]
        out[i, :] = sorted[-n:]

    top = pd.DataFrame(out, index=abbrev, columns=np.arange(10, 0, -1))

    if save:
        top.to_csv("top_{}_words.csv".format(n))



    return top
def visualize(rings):
	# the position of each ball is found using the equation of a circle
	theta = 2 * math.pi / 20
	radius = 5	
	xdata_left = np.zeros(20)
	ydata_left = np.zeros(20)
	xdata_right = np.zeros(20)
	ydata_right = np.zeros(20)
	plt.clf()	

	# get the positions for the left and right rings
	for ix in xrange(0,20):
		xdata_left[ix] = 5 + (radius * math.cos((ix + 5 * math.pi / 6) * theta))
		ydata_left[ix] = 5 + (radius * math.sin((ix + 5 * math.pi / 6) * theta))
	
		xdata_right[ix] = 12.08 + (radius * math.cos((ix + 63 * math.pi / 16) * -theta))
		ydata_right[ix] = 5.3 + (radius * math.sin((ix + 63 * math.pi / 16) * -theta))

	# set up the colors for the left and right ring
	colors_left = np.chararray(20)
	colors_right = np.chararray(20)
	for ix in xrange(0,20):
		if rings[0][ix] == 0:
			colors_left[ix] = 'r'
		elif rings[0][ix] == 1:
			colors_left[ix] = 'b'
		elif rings[0][ix] == 2:
			colors_left[ix] = 'y'
		elif rings[0][ix] == 3:
			colors_left[ix] = 'k'
		
		if rings[1][ix] == 0:
			colors_right[ix] = 'r'
		elif rings[1][ix] == 1:
			colors_right[ix] = 'b'
		elif rings[1][ix] == 2:
			colors_right[ix] = 'y'
		elif rings[1][ix] == 3:
			colors_right[ix] = 'k'

	# plot each ball individually to get the correct color
	for ix in xrange(0,20):
		plt.scatter(xdata_left[ix], ydata_left[ix], 1500, colors_left[ix])	
		plt.scatter(xdata_right[ix], ydata_right[ix], 1500, colors_right[ix])	

	plt.axis('equal')
	plt.show()
	plt.draw()
def bottom_up_longest_subsequence(x, y):
    m = len(x)
    n = len(y)

    cache = numpy.zeros((m + 1, n + 1))
    subsequences = numpy.chararray((m + 1, n + 1))
    subsequences[:] = ""

    for i in xrange(1, m + 1):
        for j in xrange(1, n + 1):
            up = cache[i - 1][j]
            left = cache[i][j - 1]

            if x[i - 1] == y[j - 1]:
                cache[i][j] = cache[i - 1][j - 1] + 1
                subsequences[i][j] = "\\"

            elif up >= left:
                cache[i][j] = up
                subsequences[i][j] = "^"
            else:
                cache[i][j] = left
                subsequences[i][j] = "<"

    return cache, subsequences
Exemple #7
0
def SAX_coding_function(PAA_in,alphabet_size):
    #Function to get the PAA coding of data
    #inputs:
    #-- PAA_in : Input data processed represented as PAA
    #-- alphabet_size : Number of levels for the coding
    #
    #Output:
    #-- SAX_result : Result of applying SAX on data

    #We obtain the SAX cut points by looking to a certain distribution:
    cut_points = read_levels_distribution(alphabet_size);
   
    #We initialize the outputs of SAX: levels and codes
    SAX_result_level = numpy.empty((PAA_in.size));
    SAX_result = numpy.chararray(PAA_in.size);
    
    #Equivalence leve-string for SAX:
    string_equivalent = numpy.array(['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']);
    
    #We go through the PAA data and we code it according to SAX:
    for i in range(PAA_in.size):
        #We check where, in the borders of the distribution, our sample is:
        SAX_result_level[i] = numpy.count_nonzero(cut_points <= PAA_in[i]);
        #We code the level:
        SAX_result[i] = string_equivalent[SAX_result_level[int(i)]-1];
    write_SAX_result(SAX_result);
    return SAX_result
def LSISort(s):

	for col in range(s.shape[1]):

		counts = numpy.zeros(26+1).astype('int')

		# Get the frequency count of the characters
		for c in s[:,col]:
			counts[ord(c) - ord('a') + 1] += 1
		
		# Get cummulative frequencies
		for i in range(1,27):
			counts[i] = counts[i-1] + counts[i]

		# construct a copy 
		res = numpy.chararray(s.shape)

		# store in sorted order
		for row in range(s.shape[0]):
			c = s[row,col]
			c_ord = ord(c) - ord('a') + 1
			res[counts[c_ord-1],:] = s[row,:]
			counts[c_ord-1] += 1

		s = res

	return res
Exemple #9
0
def cycle_decrypt(key, message, show=False):
    '''
    Perform the decryption algorithm on the given ciphertext. Note, since this
    is an asymmetric encoding algorithm we need two different operations.
    :param string: key
    :param string; message
    '''
    plaintext      = ''
    size           = len(key)
    message_length = len(message)
    column_length  = message_length / size
    words          = [message[x:x + column_length] for x in range(0, len(message), column_length)]
    grid           = np.zeros(((message_length / size), size))  # Create zero grid
    order          = [(ord(key[x]), x) for x in range(size)]
    order.sort(key=lambda tup: tup[0])
    for i in range(size):
        column = words[i]
        letters = list(column)
        position = order[i][1]
        for j in range(message_length / size):
            grid[j, position] = ord(letters[j])
    for row in grid:
        chunk = ''
        for column in row:
            chunk += chr(int(column))
        plaintext += chunk
    if show:
        string_grid = np.chararray(grid.shape)
        for i in range(len(grid)):
            for j in range(len(grid[0])):
                string_grid[i, j] = chr(int(grid[i, j]))
        print string_grid
    return plaintext
Exemple #10
0
def return_bpt_type(pdata):

    bpt_flag = np.chararray(pdata['oiii_hb'].shape[0],itemsize=12)

    #### 50th percentile determinations
    sf_line1 = 0.61 / (pdata['nii_ha'][:,0] - 0.05) + 1.3
    sf_line2 = 0.61 / (pdata['nii_ha'][:,0] - 0.47) + 1.19
    composite = (pdata['oiii_hb'][:,0] > sf_line1) & (pdata['oiii_hb'][:,0] < sf_line2)
    agn = pdata['oiii_hb'][:,0] > sf_line2

    #### from the chains
    for i, (oiii_hb,nii_ha) in enumerate(zip(pdata['oiii_hb_chain'],pdata['nii_ha_chain'])):
        sf_line1 = 0.61 / (nii_ha - 0.05) + 1.3
        sf_line2 = 0.61 / (nii_ha - 0.47) + 1.19

        ### 1 sigma composite
        composite_one = (oiii_hb > sf_line1) & (oiii_hb < sf_line2)
        if composite_one.sum()/float(composite_one.shape[0]) > 0.16:
            composite[i] = True
        
        ### 1 sigma AGN
        agn_one = oiii_hb > sf_line2
        if agn_one.sum()/float(agn_one.shape[0]) > 0.16:
            agn[i] = True
            #continue

    bpt_flag[:] = 'star-forming'
    bpt_flag[composite] = 'composite'
    bpt_flag[agn] = 'AGN'

    return bpt_flag
Exemple #11
0
def toChararray(arr, aligned=False):
    arr = array(arr, dtype='|S')
    try:
        ndim, dtype_, shape = arr.ndim, arr.dtype, arr.shape
    except AttributeError:
        raise TypeError('arr is not a Numpy array')

    if ndim < 1:
        raise ValueError('arr.ndim should be at least 1')
    if dtype_.char != 'S':
        raise ValueError('arr must be a character array')

    if ndim != 2:
        n_seq = shape[0]
        l_seq = dtype_.itemsize
        new_arr = chararray((n_seq, l_seq))
        for i, s in enumerate(arr):
            for j in range(l_seq):
                if j < len(s):
                    new_arr[i, j] = chr2(s[j])
                else:
                    if aligned:
                        raise ValueError('arr does not the same lengths')
                    new_arr[i, j] = '.'
    else:
        new_arr = array(arr, dtype='|S1')
    return new_arr
Exemple #12
0
def main():
  """ Updates given HDF5 with readme text provided in a text file.
      Text gets saved as attribute "readme" in the root group.
  """
  parser = argparse.ArgumentParser()
  parser.add_argument("--h5file", help="HDF5 File to be updated")
  parser.add_argument("--readme", help="Text file with readme content")
  
  args = parser.parse_args()

  if not args.h5file:
     print("No HDF5 given")
     return -1
  if not args.readme:
     print("No readme file given")
     return -1

  f = h5py.File(args.h5file, 'a')
 

  with open(args.readme, 'r', encoding="latin-1") as readme_file:
      text = readme_file.read()
      char_array = np.chararray((), itemsize=len(text))
      char_array[()] = text
      #print(char_array)
      f.attrs.create('readme', char_array)
  f.close()
  print("bye")
Exemple #13
0
    def create_iso(fileList, ageList, rot=True):
        """
        Given a set of isochrone files downloaded from
        http://obswww.unige.ch/Recherche/evoldb/index/Isochrone/, put in correct
        iso.fits format for parse_iso code.

        fileList: list of downloaded isochrone files (could be one)
    
        ageList: list of lists of ages associated with each file in filelist.
        MUST BE IN SAME ORDER AS ISOCHRONES IN FILE! Also needs to be in logAge
    
        rot = TRUE: assumes that models are rotating, will add appropriate column
    
        This code writes the individual files, which is then easiest to combine by hand
        in aquamacs 
        """
        # Read each file in fileList individually, add necessary columns
        for i in range(len(fileList)):
            t = Table.read(fileList[i],format='ascii')
            ages = ageList[i]

            # Find places where new models start; mass here is assumed to be 0.8
            start = np.where(t['M_ini'] == 0.8)

            # Now, each identified start is assumed to be associated with the
            # corresponding age in ages        
            if len(start[0]) != len(ages):
                print 'Ages mismatched in file! Quitting...'
                return

            age_arr = np.zeros(len(t))

        
            for j in range(len(start[0])):
                low_ind = start[0][j]
                # Deal with case at end of file
                if (j == len(start[0])-1):
                    high_ind = len(t)
                else:
                    high_ind = start[0][j+1]

                ind = np.arange(low_ind, high_ind, 1)
                age_arr[ind] = ages[j]

            # Add ages_arr column to column 1 in ischrone, as well as column
            # signifying rotation
            col_age = Column(age_arr, name = 'logAge')
            rot_val = np.chararray(len(t))
            rot_val[:] = 'r'
            if not rot:
                rot_val[:] = 'n'
            
            col_rot = Column(rot_val, name='Rot')
        
            t.add_column(col_rot, index=0)
            t.add_column(col_age, index=0)

            t.write('tmp'+str(i)+'.fits')

        return
Exemple #14
0
    def __init__(self, filename):
        """
        Args:
            filename_list (str): list of strings with filenames. These filenames are expected to be in the
            FITS format for targets.
        """
    
        
        hdulist = fits.open(filename)        
        self.filename = filename
        self.ra = hdulist[1].data['RA']
        self.dec = hdulist[1].data['DEC']
        self.type = hdulist[1].data['OBJTYPE']
        self.id = np.int_(hdulist[1].data['TARGETID'])
        self.tile_ra = hdulist[1].header['TILE_RA']
        self.tile_dec = hdulist[1].header['TILE_DEC']
        self.tile_id = hdulist[1].header['TILE_ID']
        self.n = np.size(self.ra)
        fc = desimodel.focalplane.FocalPlane(ra=self.tile_ra, dec=self.tile_dec)
        self.x, self.y = fc.radec2xy(self.ra, self.dec)

        # this is related to the fiber assignment 
        self.fiber = -1.0 * np.ones(self.n, dtype='i4')

        # This section is related to the number of times a galaxy has been observed,
        # the assigned redshift and the assigned type
        self.n_observed = np.zeros(self.n, dtype='i4')
        self.assigned_z = -1.0 * np.ones(self.n)
        self.assigned_type =  np.chararray(self.n, itemsize=8)
        self.assigned_type[:] = 'NONE'
    def test__try_fl_mv_right__try_intify_mv_left(self):
        cs = ColSplitter()
        cs._token_col_lengths = [-1, -1, 3]

        charr = np.chararray((3, 3), 5)
        charr[0, 0] = cs._null
        charr[0, 1] = '1.0'
        charr[0, 2] = cs._null

        charr[1, 0] = cs._null
        charr[1, 1] = '12.0'
        charr[1, 2] = cs._null

        charr[2, 0] = cs._null
        charr[2, 1] = '0.34'
        charr[2, 2] = cs._null

        res = cs._try_fl_mv_right__try_intify_mv_left(charr, 0, 1)
        self.assertEqual(cs._null, res[0, 0])
        self.assertEqual(cs._null, res[0, 1])
        self.assertEqual(b'1.0', res[0, 2])

        res = cs._try_fl_mv_right__try_intify_mv_left(charr, 1, 1)
        self.assertEqual(b'12', res[1, 0])
        self.assertEqual(cs._null, res[1, 1])
        self.assertEqual(cs._null, res[1, 2])

        res = cs._try_fl_mv_right__try_intify_mv_left(charr, 2, 1)
        self.assertEqual(cs._null, res[2, 0])
        self.assertEqual(b'0.34', res[2, 1])
        self.assertEqual(cs._null, res[2, 2])
    def test__merge_cols(self):
        cs = ColSplitter()
        cs._token_col_types = [cs._int, cs._float]
        cs._token_col_lengths = [-1, -1]

        charr = np.chararray((7, 2), 5)
        charr[0, 0] = cs._null
        charr[1, 0] = '23'
        charr[2, 0] = cs._null
        charr[3, 0] = cs._null
        charr[4, 0] = '42'
        charr[5, 0] = '123'
        charr[6, 0] = cs._null

        charr[0, 1] = '12.0'
        charr[1, 1] = cs._null
        charr[2, 1] = '13.0'
        charr[3, 1] = cs._null
        charr[4, 1] = cs._null
        charr[5, 1] = cs._null
        charr[6, 1] = cs._null

        res = cs._merge_cols(charr)
        # self.assertEqual((5, 1), res.shape)
        self.assertEqual(b'12', res[0, 0])
        self.assertEqual(b'23', res[1, 0])
        self.assertEqual(b'13', res[2, 0])
        self.assertEqual(cs._null, res[3, 0])
        self.assertEqual(b'42', res[4, 0])
Exemple #17
0
def depth_count():
    ops = ['+','-','*','/','^']
    parantheses=['(',')']
    input = raw_input('*')
    j=0
    k=0
    l=0
    processed = np.chararray((len(input),1))
    oplist = []
    parlist = []
    for e in input:
        if e in ops:
            oplist.append(e)
            processed[j,0]=k
            k = k + 1
        else:
            if e in parantheses:
                oplist.append(0)
                parlist.append(e)
                processed[j,0]=l
                l = l + 1
            else:
                processed[j,0]=e
        
        j=j+1
                
    print processed
    print oplist
    print parlist
    return k
    def _get_dates(self, uid,lim):

        locations_index = np.transpose(self._locations[:, USER_ID] == int(uid))
        date_locations = self._locations[locations_index, :]
        user_locations = date_locations[:, (LATITUDE, LONGITUDE,ORDINAL_DATE)]
        user_location_size=np.shape(user_locations)
        datedanszone=[]
        for i in range(int(user_location_size[0])):
            contains= self._inside(lim,user_locations[i,0],user_locations[i,1])
            if contains:
                datedanszone.append(user_locations[i,2])
        datedanszone=list(set(datedanszone))

        if len(datedanszone)==0:
            return {}

        datematrix=np.chararray((len(datedanszone),2),itemsize=30)
        week=['Lundi','Mardi','Mercredi','Jeudi','Vendredi','Samedi','Dimanche']
        month=['Janvier','Fevrier','Mars','Avril','Mai','Juin','Juillet','Aout','Septembre','Octobre','Novembre','Decembre']
        i=0
        for d in datedanszone:
            datematrix[i,0]=int(d)
            datedisp=dt.datetime.fromordinal(int(d))
            datematrix[i,1]=week[datedisp.weekday()]+" "+str(datedisp.day)+" "+month[datedisp.month-1]+" "+str(datedisp.year)
            i=i+1
        
        return {
                "availableOptionsForDate": [{"date":d,"datedisp":dd} for d,dd in datematrix ],
                "selectedOptionfordate": {"date":datedanszone[0]}
                }
Exemple #19
0
def jdToFull (jd, form='H'):
    """Return a textual representation of a Julian date.

:arg double jd: a Julian date
:arg character form: the output format, described below.
  Defaults to "H".
:returns: the textualization of the Julian date
:raises: :exc:`MiriadError` in case of buffer overflow
  (should never happen)

The possible output formats are:

==========  ====================================
Character   Result
==========  ====================================
*H*         "yyMONdd:mm:mm:ss.s" ("MON" is the three-letter abbreviation
            of the month name.)
*T*         "yyyy-mm-ddThh:mm:ss.s" (The "T" is literal.)
*D*         "yyMONdd.dd"
*V*         "dd-MON-yyyy" (loses fractional day)
*F*         "dd/mm/yy" (loses fractional day)
==========  ====================================
"""

    calday = N.chararray (120)
    _miriad_f.julday (jd, form, calday)

    for i in xrange (calday.size):
        if calday[i] == '':
            return calday[:i].tostring ()

    raise MiriadError ('Output from julday exceeded buffer size')
Exemple #20
0
def main():
    Data()  # Initialize data set
    attribute_indices = np.arange(1, 23)
    obj = np.arange(4062)
    tree = build_dt(attribute_indices, obj)
    if Data.display_tree_flag:
        tree_list_by_level = []
        list_decision_tree(tree_list_by_level, tree, 1)
        display_decision_tree(tree_list_by_level)
    test_num = 2031
    result = np.chararray(test_num)
    test_obj = np.arange(test_num)
    classify(tree, test_obj, result)
    if Data.mode == 'i':
        print "Entropy,",
    else:
        print "Misclassification Error,",
    print "Confidence Level: {}".format(args.confidence_level),
    if not Data.validation_flag:
        print "Accuracy: {}".format(np.sum(result == Data.test[test_obj, 0]) / float(test_num))
    else:
        print "Result printed in validation_result.txt."
        f = open('validation_result.txt', 'w')
        for x in result:
            print >>f, x
        f.close()
def combineTechnicalIndicators(ticker):
    dates, prices = getDateAndPrice(ticker)
    np_dates = np.chararray(len(dates), itemsize=len(dates[0]))
    for day in range(len(dates)):
        np_dates[day] = dates[day]

    percentChange = calcDailyPercentChange(prices)
    vol = calc30DayVol(percentChange)
    RSI = calcRSI(prices)


    if ticker == PREDICTED:
        np_prices = np.array(prices)
        label = np.zeros_like(np_prices)

    #create label for price of SPY
        for x in range(len(np_prices[:-lagTime])):
            print x
            if np_prices[x] < np_prices[x + lagTime]:
                label[x] = 1
            else:
                label[x] = 0
        features = np.column_stack((np_dates,  percentChange, vol, RSI, label))
        headers = ['date', 'return_'+ ticker, 'vol_'+ ticker, 'RSI_'+ ticker, 'label']
    else:
        features = np.column_stack((np_dates, percentChange, vol, RSI))
        headers = ['date', 'return_'+ ticker, 'vol_'+ ticker, 'RSI_'+ ticker]

    df_features = pd.DataFrame(features, columns=headers)
    print df_features[25:35]
    return df_features
Exemple #22
0
 def __init__(self,cards,parent=None):
     super(header_view,self).__init__(parent)
     self.setupUi(self)
     font = QFont("Courier",11)
     font.setFixedPitch(1)
     self.cardlist.setFont(font)
     key = []
     keymax = 0
     value = []
     extra = []
     for index in range(len(cards)):
         item = cards[index]
         #item = str(item).split('=')
         one = str(item[0]).strip()
         two = str(item[1]).strip()
         try:
             extra.append(str(item[2]).strip())
         except:
             extra.append('')
         key.append(one)
         value.append(two)
         if len(one) > keymax:
             keymax = len(one)
     for k in range(len(key)):
         string = np.chararray(1,keymax+5-len(key[k]))
         string=(keymax+5-len(key[k]))*' '
         res = key[k]+str(string)+'=  '+value[k]+' / '+extra[k]
         temp = QListWidgetItem(res)
         temp.setTextAlignment(1)
         self.cardlist.addItem(temp)
     QObject.connect(self.okbutton,SIGNAL('clicked()'),self.close)
     self.exec_()
def keyIndexSort(s):

	counts = numpy.zeros(26+1).astype('int')

	# Get the frequency count of the characters
	for c in s:
		counts[ord(c) - ord('a') + 1] += 1
	
	# Get cummulative frequencies
	for i in range(1,27):
		counts[i] = counts[i-1] + counts[i]

	# construct a copy 
	res = numpy.chararray(len(s)) 

	# store in sorted order
	for c in s:
		c_ord = ord(c) - ord('a') + 1
		res[counts[c_ord-1]] = c
		counts[c_ord-1] += 1

	if type(s) == str:
		res = res.tostring()

	return res
Exemple #24
0
	def calculateAllelicCount(self):
		self.appLogger.info("Calculating Allelic counts")
		sequenceDescriptions=self.msa.keys()
		sequenceSize=0; nSequences=0;
		nSequences=len(sequenceDescriptions)
		for sequence in self.msa:
			sequenceSize=max(sequenceSize,len(self.msa[sequence]))

		matrix=np.chararray((nSequences,sequenceSize), itemsize=1)
		for row in range(0,nSequences):
			key=sequenceDescriptions[row]
			seq=self.msa[key]
			for pos in range(0, sequenceSize):
				matrix[row,pos]=seq[pos]
		self.appLogger.warning("Sequence size: {} and matrix cols: {}".format(sequenceSize,matrix.shape[1]))
		A=np.zeros(sequenceSize); C=np.zeros(sequenceSize);
		T=np.zeros(sequenceSize); G=np.zeros(sequenceSize);
		N=np.zeros(sequenceSize); GAP=np.zeros(sequenceSize);
		TOTAL=np.zeros(sequenceSize)
		for pos in range(0,sequenceSize):
			c=Counter(matrix[:,pos])
			A[pos]=c["A"]
			C[pos]=c["C"]
			T[pos]=c["T"]
			G[pos]=c["G"]
			N[pos]=c["N"]
			GAP[pos]=c["-"]
			if A[pos] > 0: TOTAL[pos]+=1
			if C[pos] > 0: TOTAL[pos]+=1
			if T[pos] > 0: TOTAL[pos]+=1
			if G[pos] > 0: TOTAL[pos]+=1
		return A,C,G,T,N,GAP,TOTAL,nSequences,sequenceSize
Exemple #25
0
def llz2utm(lon,lat,projection_zone='None'):
    '''
    Convert lat,lon to UTM
    '''
    from numpy import zeros,where,chararray
    import utm
    from pyproj import Proj
    from scipy.stats import mode
    
    x=zeros(lon.shape)
    y=zeros(lon.shape)
    zone=zeros(lon.shape)
    b=chararray(lon.shape)
    if projection_zone==None:
        #Determine most suitable UTM zone
        for k in range(len(lon)):
            #x,y,zone[k],b[k]=utm.from_latlon(lat[k],lon[k]-360)
            x,y,zone[k],b[k]=utm.from_latlon(lat[k],lon[k])
        zone_mode=mode(zone)
        i=where(zone==zone_mode)[0]
        letter=b[i[0]]
        z=str(int(zone[0]))+letter
    else:
        z=projection_zone
    print z
    p = Proj(proj='utm',zone=z,ellps='WGS84')
    x,y=p(lon,lat)
    return x,y
Exemple #26
0
def findIndex(ids, bedids): 
    N1 = ids.shape[0]
    com1 = SP.chararray(N1, itemsize=30)
    #com1 = {}
    N2 = bedids.shape[0]
    com2 = {}
    for i in range(N1):
        com1[i] = ids[i,0] + "_" + ids[i,1] 
        #com1[ ids[i,0] + "_" + ids[i,1] ] = i
    for i in range(N2):
        com2[ bedids[i,0] + "_" + bedids[i,1] ] = i
    if (N1 <= N2):
        index = SP.zeros(N1)
        count = 0
        for i in range(N1):
            try:
                ind = com2[ com1[i] ]
            except KeyError:
                continue
            index[count] = ind
            count = count + 1
    else:
        index = SP.zeros(N2)
        count = 0
        for i in range(N2):
            try:
                ind = com2[ com1[i] ]
            except KeyError:
                continue
            index[count] = ind
            count = count + 1
    index = index[0:count]
    #index = index[index != -1]
    return index.astype('I')
Exemple #27
0
def generateData(fname,bidders,test=False):
    data=pd.read_csv(fname)
    data=data.values

    data_x=np.zeros((len(data),feat_size))
    data_y=np.zeros(len(data))
    id=np.chararray(len(data),itemsize=37)
    for i in tqdm(range(len(data))):
        gc.collect()
        bidder_name=data[i][0]
        id[i]=bidder_name
        if bidder_name in bidders:
            bid_info=bidders[bidder_name]
            data_x[i,:]=bidderFeatEng(bid_info)
            '''
            for j in range(min(len(bid_info),feat_size)):
                idx = 7*j
                for k in range(7):
                    data_x[i,idx+k]=bid_info[j][k+1]
            '''
            if not test:
                data_y[i]=data[i][3]

    if not test:
        return data_x,data_y,id
    else:
        return data_x,id
def main():

    args = parse_args()

    N = not args.N
    out = args.out
    #out='/home/vorberg/test.psc'
    #N=1000


    msa = np.chararray((N, 4))

    set_1 = [io.AMINO_ACIDS[a] for a in np.random.choice(range(1, 11), N/2)]
    set_2 = [io.AMINO_ACIDS[a] for a in np.random.choice(range(11, 21), N/2)]


    set_3 = [io.AMINO_ACIDS[21-io.AMINO_INDICES[a]] for a in set_2]
    set_4 = [io.AMINO_ACIDS[21-io.AMINO_INDICES[a]] for a in set_1]

    msa[:, 0] = set_4 + set_3
    msa[:, 1] = set_1 + set_2

    msa[:, 2] = set_2 + set_1
    msa[:, 3] = set_3 + set_4

    np.savetxt(out, msa, delimiter='', newline='\n', fmt='%s')
Exemple #29
0
def _setup_krd(*args):
    krd = np.chararray((10, 8), order='F')
    for i in xrange(10):
        _chrcopy(krd, i, "        ")
    for i, arg in enumerate(args):
        _chrcopy(krd, i, arg)
    return krd
Exemple #30
0
def levelGen(size):
	world = np.chararray((size,size))
	world[:]='.'
	world[0,]='|'
	world[-1,]='|'
	world[:,0]='|'
	world[:,-1]='|'
	def r(): return random.randint(1,size-2)
	
	area = size**2
	density = float(world.count('|').sum()-size*4)/float(area)
	'''
	while density <0.15:
		x,y=r(),r()
		for i in range(random.randint(1,7)):
			world[x%size,y%size]='|'
			next = random.choice([-1,1])
			if random.randint(0,1):
				x+=next
			else: 
				y+=next
			density = float(world.count('|').sum()-size*4)/float(area)
	'''
	#sets spawnpoint
	world[r(),r()]=random.choice(['N','E','S','W'])

	world=  '\n'.join(map(''.join,world.tolist()))
	return  world
Exemple #31
0
from sklearn.ensemble import GradientBoostingClassifier

warnings.filterwarnings("ignore", category=DeprecationWarning)

did = sys.argv[1]
did = str(did)

genre_list = ['R&B', 'Country', 'Rap', 'Reggae', 'Religious', 'Metal']
sample_song_dataset = pickle.load(
    open('song_vectors_for_genre_dataset_' + did + '.pickle', 'rb'))
train = pickle.load(open("../dataset/80_20_train_" + did + ".pickle", "rb"))
test = pickle.load(open("../dataset/80_20_test_" + did + ".pickle", "rb"))
id_detail = pickle.load(open("id_detail_" + did + ".pickle", "rb"))
count = 0
ab = np.random.rand(len(sample_song_dataset['train']), 300)
label = np.chararray(len(sample_song_dataset['train']), itemsize=9)
label[:] = ''

for songId in sample_song_dataset['train']:
    j = 0
    for feature in sample_song_dataset['train'][songId]['vector']:
        ab[count][j] = feature
        j += 1
    label[count] = train[id_detail[count]]['genre']
    count += 1

clf = SVC(random_state=10)
clf.fit(ab, label)

print("Total Count:" + str(count) + "  Original Count:" +
      str(len(sample_song_dataset['train'])))
Exemple #32
0
n1 = n[:,1].reshape(row_no, col_no)
n2 = n[:,2].reshape(row_no, col_no)
n3 = n[:,3].reshape(row_no, col_no)

n0[n0 == 80] = 0
n1[n0 == 80] = 0
n2[n0 == 80] = 0
n3[n0 == 80] = 0

ns = (n0 + n1 + n2 + n3) / 4
nm = np.maximum.reduce([n0,n1,n2,n3])

height = n0.shape[0]
width = n0.shape[1]

best = np.chararray((height, width), itemsize=1, unicode=True)
max_q = np.zeros((height, width))
print n0.shape
print best.shape
for i in xrange(height):
    for j in xrange(width):
        if n0[i,j] == 80 or n0[i,j] == -10:
            best[i,j] = 'B'
            continue

        up = n0[i,j]
        left = n1[i,j]
        down = n2[i,j]
        right = n3[i,j]
        
        highest = max(up,left,down,right)
Exemple #33
0
def plot_raters(dataframe, ax=None, width=101, size=0.40):
    raters = sorted(dataframe.columns.ravel().tolist())
    dataframe["notnan"] = np.any(np.isnan(dataframe[raters]),
                                 axis=1).astype(int)
    dataframe = dataframe.sort_values(by=["notnan"] + raters, ascending=True)
    for rater in raters:
        dataframe[rater] = dataframe[[rater]].astype(str)

    matrix = dataframe.as_matrix()
    nsamples, nraters = dataframe.shape
    matrix = fill_matrix(matrix, width)

    nblocks = 1
    if matrix.shape[0] > width:
        matrices = []
        nblocks = (matrix.shape[0] // width) + 1

        nas = np.chararray((width, 1), itemsize=3)
        nas[:] = "n/a"
        for i in range(nblocks):
            if i > 0:
                matrices.append(nas)
            start_index = i * width
            stop_index = (i + 1) * width
            matrices.append(matrix[start_index:stop_index, ...])

        matrices[-1] = fill_matrix(matrices[-1], width)
        matrix = np.hstack(tuple(matrices))

    palette = {
        "1.0": "limegreen",
        "0.0": "dimgray",
        "-1.0": "tomato",
        "n/a": "w",
    }

    ax = ax if ax is not None else plt.gca()

    # ax.patch.set_facecolor('gray')
    ax.set_aspect("equal", "box")
    ax.xaxis.set_major_locator(plt.NullLocator())
    ax.yaxis.set_major_locator(plt.NullLocator())
    nrows = ((nsamples - 1) // width) + 1
    xlims = (-14.0, width)
    ylims = (-0.07 * nraters, nrows * nraters + nraters * 0.07 + (nrows - 1))

    ax.set_xlim(xlims)
    ax.set_ylim(ylims)

    offset = 0.5 * (size / 0.40)
    for (x, y), w in np.ndenumerate(matrix):
        if w not in list(palette.keys()):
            w = "n/a"

        color = palette[w]
        rect = plt.Circle([x + offset, y + offset],
                          size,
                          facecolor=color,
                          edgecolor=color)
        ax.add_patch(rect)

    # text_x = ((nsamples - 1) % width) + 6.5
    text_x = -8.5
    for i, rname in enumerate(raters):
        nsamples = sum(dataframe[rname] != "n/a")
        good = 100 * sum(dataframe[rname] == "1.0") / nsamples
        bad = 100 * sum(dataframe[rname] == "-1.0") / nsamples

        text_y = 1.5 * i + (nrows - 1) * 2.0
        ax.text(
            text_x,
            text_y,
            "%2.0f%%" % good,
            color="limegreen",
            weight=1000,
            size=16,
            horizontalalignment="right",
            verticalalignment="center",
            transform=ax.transData,
        )
        ax.text(
            text_x + 3.50,
            text_y,
            "%2.0f%%" % max((0.0, 100 - good - bad)),
            color="dimgray",
            weight=1000,
            size=16,
            horizontalalignment="right",
            verticalalignment="center",
            transform=ax.transData,
        )
        ax.text(
            text_x + 7.0,
            text_y,
            "%2.0f%%" % bad,
            color="tomato",
            weight=1000,
            size=16,
            horizontalalignment="right",
            verticalalignment="center",
            transform=ax.transData,
        )

    # ax.autoscale_view()
    ax.invert_yaxis()
    plt.grid(False)

    # Remove and redefine spines
    for side in ["top", "right", "bottom"]:
        # Toggle the spine objects
        ax.spines[side].set_color("none")
        ax.spines[side].set_visible(False)

    ax.spines["left"].set_linewidth(1.5)
    ax.spines["left"].set_color("dimgray")
    # ax.spines["left"].set_position(('data', xlims[0]))

    ax.set_yticks([0.5 * (ylims[0] + ylims[1])])
    ax.tick_params(axis="y", which="major", pad=15)

    ticks_font = FontProperties(
        family="FreeSans",
        style="normal",
        size=20,
        weight="normal",
        stretch="normal",
    )
    for label in ax.get_yticklabels():
        label.set_fontproperties(ticks_font)

    return ax
Exemple #34
0
def world(n, m):
    grid = np.chararray((n, m))
    grid = [['-' for j in i] for i in grid]
    return grid
#converting it to a dictionary and storing it to a list
#tweets = [i.asdict() for i in t]
#print tweets[0]

pub_tweets = []

for tweet in tweets:
    pub_tweets.append(tweet.text)

#pub_tweets =public_tweets.encode('ascii','ignore')

print pub_tweets

tweet_count = len(pub_tweets)
senti = np.chararray((1, tweet_count), itemsize=8)
emotion = np.empty([1, tweet_count])

for i in range(tweet_count):
    analysis = TextBlob(pub_tweets[i])

    emotion[0, i] = analysis.sentiment.polarity  #storing polarity for tweets
    #sentiment according to polarity thresholds of negative, 0 and positive
    #values respectively
    if emotion[0, i] < 0:
        senti[0, i] = 'negative'
    elif emotion[0, i] == 0:
        senti[0, i] = 'neutral'
    else:
        senti[0, i] = 'positive'
Exemple #36
0
Counting fields starts from 1.

"""

while True:
    try:
        game_size = int(raw_input("Size of game field: "))
        break
    except ValueError:
        print "You have to give an intiger!"

minesweeper_field = np.random.randint(6, size=(game_size, game_size))
for x in np.nditer(minesweeper_field, op_flags=['readwrite']):
    if x > 1: x[...] = 0
game_filed = np.chararray((game_size, game_size))
game_filed[:] = "?"

while True:
    print game_filed

    chosen_field = (raw_input("Chose filed> ")).split(",")
    chosen_field[0] = int(chosen_field[0]) - 1
    chosen_field[1] = int(chosen_field[1]) - 1

    if minesweeper_field[chosen_field[0], chosen_field[1]] == 1:
        print "GAME OVER!"
        print minesweeper_field
        break

    try:
Exemple #37
0
 def __init__(self, x, y):
     self._x = x
     self._y = y
     self._image = np.chararray((x, y))
     self._color = ''
     self.can_die = False
Exemple #38
0
# crater name and buffer extent
fieldname1 = arcpy.ValidateFieldName("CRATER_ID")
fieldname2 = arcpy.ValidateFieldName("BUFFER_TXT")

# add fields
arcpy.AddField_management(infile, fieldname1, "TEXT", "", "", 30)

# get the number of rows in infile
n = int(arcpy.GetCount_management(infile)[0])

# prepare empty arrays
diam = np.ones(n)
x_coord = np.ones(n)
y_coord = np.ones(n)
crater_id = np.chararray(n, itemsize=30)
buffer_txt = np.chararray(n, itemsize=30)

#crater_id_list = ['flamsteed_s', 'herigonius_k']

#crater_id_list = ['flamsteed_s', 'herigonius_k', 'unnamed_0000' ,'encke_x',
#                  'lassell_d','unnamed_0001','samir','unnamed_0002','unnamed_0003',
#                  'unnamed_0004','unnamed_0005','unnamed_0006','unnamed_0007','unnamed_0008',
#                  'unnamed_0009','unnamed_0010','unnamed_0011','unnamed_0012','unnamed_0013']

#crater_id = np.array(crater_id_list)

with arcpy.da.UpdateCursor(
        infile, ["Diam_km", "CRATER_ID", "x_coord", "y_coord"]) as cursor:
    ix = 0
    for row in cursor:
        csv_as_list = list(reader)
    # reader = unicode_csv_reader(open(source_data_path))
    #     csv_as_list = list(reader)
    return np.asarray(csv_as_list)


csv_as_ndarray = open_csv_as_numpy()
csv_as_df = pd.DataFrame(csv_as_ndarray[1:]).sort_values([2])
size = csv_as_df.groupby(2).count().min()[0]        # sample size
replace = False  # with replacement
fn = lambda obj: obj.loc[np.random.choice(obj.index, size, replace),:]
csv_as_df = csv_as_df.groupby(2, as_index=False).apply(fn)
csv_as_ndarray = np.vstack((csv_as_ndarray[0], csv_as_df))

new_shape = [batch_size, class_names.shape[0]]
formatted_ndarray = np.chararray(new_shape)
formatted_ndarray[:] = '0'
num_of_batches = int(math.ceil(csv_as_ndarray.shape[0] / float(batch_size)))
fout = open(output_path, 'ab')
for i in range(num_of_batches - 1):
    lo = i*batch_size
    if i < num_of_batches - 1:
        hi = (i+1)*batch_size
    elif i == num_of_batches - 1:
        hi = csv_as_ndarray.shape[0]
    output_buffer = formatted_ndarray[:hi-lo].astype('U256')
    if i == 0:
        output_buffer[0] = np.array(class_names).astype('U256')
    for j in range(0, class_names.shape[0]):
        output_buffer[csv_as_ndarray[lo:hi,2] == class_names[j], j] = '1'
    # csv_as_ndarray = np.resize(csv_as_ndarray, new_shape)
def main(inimgtemplate, inbgrcsv, inshape, outimg):

    inDS = gdal.Open(inimgtemplate, gdal.GA_ReadOnly)
    gt = inDS.GetGeoTransform()
    ns = inDS.RasterXSize
    nl = inDS.RasterYSize
    print(gt)

    tabdata = np.genfromtxt(inbgrcsv,
                            dtype=[('names', '|S43'), ('blue', 'f8'),
                                   ('green', 'f8'), ('red', 'f8')],
                            delimiter=',',
                            skip_header=0)

    tabnames = np.chararray(len(tabdata), itemsize=10)
    for d in range(len(tabdata)):
        tabnames[d] = tabdata['names'][d][0:9]

    newstuff = np.zeros((2, len(tabdata)), dtype=np.int64)

    shp = ogr.Open(inshape)
    lyr = shp.GetLayer()
    numfeat = lyr.GetFeatureCount()

    ## if input image already exists, read it and update,
    ## otherwise, Create output image
    if (os.path.isfile(outimg)):
        outDS = gdal.Open(outimg, gdal.GA_Update)
        band1 = outDS.GetRasterBand(1)
        band2 = outDS.GetRasterBand(2)
        band3 = outDS.GetRasterBand(3)
        band4 = outDS.GetRasterBand(4)
        band5 = outDS.GetRasterBand(5)
        band6 = outDS.GetRasterBand(6)
        blue = band1.ReadAsArray()
        green = band2.ReadAsArray()
        red = band3.ReadAsArray()
        bluesd = band4.ReadAsArray()
        greensd = band5.ReadAsArray()
        redsd = band6.ReadAsArray()
    else:
        drv = gdal.GetDriverByName('GTiff')
        outDS = drv.Create(outimg, xsize=inDS.RasterXSize, ysize=inDS.RasterYSize, \
          bands=inDS.RasterCount * 2, eType=gdal.GDT_Float32, options=["COMPRESS=LZW"])
        outDS.SetProjection(inDS.GetProjection())
        outDS.SetGeoTransform(inDS.GetGeoTransform())
        blue = np.zeros((inDS.RasterYSize, inDS.RasterXSize), dtype=np.float32)
        green = np.zeros((inDS.RasterYSize, inDS.RasterXSize),
                         dtype=np.float32)
        red = np.zeros((inDS.RasterYSize, inDS.RasterXSize), dtype=np.float32)
        bluesd = np.zeros((inDS.RasterYSize, inDS.RasterXSize),
                          dtype=np.float32)
        greensd = np.zeros((inDS.RasterYSize, inDS.RasterXSize),
                           dtype=np.float32)
        redsd = np.zeros((inDS.RasterYSize, inDS.RasterXSize),
                         dtype=np.float32)

    ## for each point ASD feature, match the root name to the list and get
    ## Blue, Green, and Red values to insert into the pixel at its location.

    pix = np.zeros(numfeat, dtype=np.int64)
    lin = np.zeros(numfeat, dtype=np.int64)
    textit = np.chararray(numfeat, itemsize=11)

    featnames = []

    for featnum in range(numfeat):
        feat = lyr.GetNextFeature()
        featnames.append((feat.GetField("specname"))[0:9])
        geom = feat.GetGeometryRef()
        xval = geom.GetX()
        yval = geom.GetY()
        pix[featnum] = math.floor((xval - gt[0]) / gt[1])
        lin[featnum] = math.floor((yval - gt[3]) / gt[5])
        textit[featnum] = ("%05d %05d" % (pix[featnum], lin[featnum]))

    uniqrowcol, uniqind = np.unique(textit, return_index=True)

    templist = []

    for t, k in enumerate(uniqrowcol.tolist()):
        pixlin = [int(k.split()[0].decode()), int(k.split()[1].decode())]
        ## ind = np.logical_and(np.equal(pix, pixlin[0]), np.equal(lin, pixlin[1]))
        ## numvals = ind.sum()
        print(pixlin)
        set1 = np.char.equal(k, textit)
        setfeatnames = np.asarray(featnames)[set1]
        pixlistblue = []
        pixlistgreen = []
        pixlistred = []

        for thename in setfeatnames.tolist():
            for j, tabrow in enumerate(tabnames):
                if (tabrow.decode() == thename):
                    pixlistblue.append(tabdata['blue'][j])
                    pixlistgreen.append(tabdata['green'][j])
                    pixlistred.append(tabdata['red'][j])
                    break

        print(k, len(pixlistblue))
        meanvalblue = np.mean(np.asarray(pixlistblue))
        sdvalblue = np.std(np.asarray(pixlistblue))
        meanvalgreen = np.mean(np.asarray(pixlistgreen))
        sdvalgreen = np.std(np.asarray(pixlistgreen))
        meanvalred = np.mean(np.asarray(pixlistred))
        sdvalred = np.std(np.asarray(pixlistred))
        blue[pixlin[1], pixlin[0]] = meanvalblue
        green[pixlin[1], pixlin[0]] = meanvalgreen
        red[pixlin[1], pixlin[0]] = meanvalred
        bluesd[pixlin[1], pixlin[0]] = sdvalblue
        greensd[pixlin[1], pixlin[0]] = sdvalgreen
        redsd[pixlin[1], pixlin[0]] = sdvalred

    shp, lyr = None, None

    print("All point features processed")

    band1 = outDS.GetRasterBand(1)
    band1.SetNoDataValue(0.0)
    band1.WriteArray(blue)
    band2 = outDS.GetRasterBand(2)
    band2.SetNoDataValue(0.0)
    band2.WriteArray(green)
    band3 = outDS.GetRasterBand(3)
    band3.SetNoDataValue(0.0)
    band3.WriteArray(red)
    band4 = outDS.GetRasterBand(4)
    band4.SetNoDataValue(0.0)
    band4.WriteArray(bluesd)
    band5 = outDS.GetRasterBand(5)
    band5.SetNoDataValue(0.0)
    band5.WriteArray(greensd)
    band6 = outDS.GetRasterBand(6)
    band6.SetNoDataValue(0.0)
    band6.WriteArray(redsd)

    band1, band2, band3, band4, band5, band6 = None, None, None, None, None, None
    inDS, outDS = None, None
#!/usr/bin/python
import time
import sys
import numpy as np

# YOUR FUNCTIONS GO HERE -------------------------------------
# 1. Populate the scoring matrix and the backtracking matrix

seq1 = 'AC'
seq2 = 'AG'
score_matrix = np.zeros((len(seq2) + 1, len(seq1) + 1), dtype=int)
backtrack_matrix = np.chararray((len(seq2) + 1, len(seq1) + 1), unicode=True)
backtrack_matrix[:] = 'D'


def populate(seq1, seq2):

    global backtrack_matrix
    global score_matrix

    for i in range(len(seq2) + 1):
        for j in range(len(seq1) + 1):
            if i == 0 and j == 0:
                score_matrix[i][j] = 0
                backtrack_matrix[i][j] = 'E'

            elif i == 0 and j != 0:
                score_matrix[i][j] = -2 * j

                backtrack_matrix[i][j] = 'L'
#train=np.loadtxt("blend_train7.txt")[:,0:18]
#test=np.loadtxt("blend_pred7.txt")[:,0:18]
train = np.loadtxt("blend_train8_50.txt")
test = np.loadtxt("blend_pred8_50.txt")
target = pd.read_csv('./target.csv', index_col=0)
submission = pd.read_csv('./submission.csv')

est = xgb.XGBClassifier(max_depth=7,
                        learning_rate=0.023577270815059184,
                        n_estimators=189,
                        gamma=0.074789906865142142,
                        min_child_weight=3.066587035368701,
                        subsample=0.49697592546415631,
                        colsample_bytree=0.95166695052920525,
                        reg_alpha=0.20645978460961734,
                        objective='multi:softmax')

est.fit(train, target['status_group'])
pred = est.predict(test)
importances = est.booster().get_fscore()
sorted_imp = sorted(importances.items(), key=operator.itemgetter(1))

output = np.chararray(len(pred), itemsize=30)
output[pred == 0] = 'functional'
output[pred == 1] = 'functional needs repair'
output[pred == 2] = 'non functional'

submission['status_group'] = output
submission.to_csv('output.csv', index=False)

etime = float(time.time() - stime)
Exemple #43
0
def catalogue(refresh=None, **kwargs):
    """
	
	Read UPSCALE catalogue for JASMIN
	   >>> cat = catalogue(Experiment='present_climate', Frequency='monthly')
	   
	refresh = True: refresh CMIP5 cataloge
	   >>> cat = catalogue(refresh=True, Experiment='future_climate', Frequency='6hourly')
	   
	"""

    ### Location of catologue file
    cat_file = baspy_path + '/upscale_catalogue.npy'

    ### If cat_file does not exist, then set refresh=True
    if (os.path.isfile(cat_file) == False):
        print(
            "Catalogue of data files does not exist, this may be the first time you've run this code"
        )
        print("Building catalogue now... this could take a few minutes")
        refresh = True

    if (refresh == True):

        ### Get all paths for all model data
        dirs = glob.glob(upscale_dir + '/GA3/*/*/*/m??s??i???/*')
        dirs = filter(lambda f: os.path.isdir(f), dirs)

        ### Convert list to numpy array
        dirs = np.array(dirs, dtype=str)

        ### Keep only dirs which include standard UPSCALE job IDs
        GA3_job_ids = [
            'xhqij', 'xhqik', 'xhqil', 'xhqin', 'xhqio', 'xgxqo', 'xgxqp',
            'xgxqq', 'xgxqe', 'xgxqf', 'xgxqg', 'xgxqh', 'xgxqi', 'xhqir',
            'xhqis', 'xgyip', 'xgyid', 'xgyie', 'xgyif', 'xgxqk', 'xgxql',
            'xgxqm'
        ]

        keep_dirs = []
        for dir in dirs:
            for jobid in GA3_job_ids:
                if jobid in dir: keep_dirs.append(dir)
        dirs = keep_dirs

        ### setup character arrays
        GA_str = np.chararray(len(dirs), itemsize=14)
        exp_str = np.chararray(len(dirs), itemsize=16)
        res_str = np.chararray(len(dirs), itemsize=14)
        freq_str = np.chararray(len(dirs), itemsize=14)
        stash_str = np.chararray(len(dirs), itemsize=14)
        job_str = np.chararray(len(dirs), itemsize=14)

        for i in range(0, len(dirs)):
            split_str = re.split('/', dirs[i])
            GA_str[i] = split_str[4]
            exp_str[i] = split_str[5]
            res_str[i] = split_str[6]
            freq_str[i] = split_str[7]
            stash_str[i] = split_str[8]
            job_str[i] = split_str[9]

        dt = np.dtype([('GA', '|S14'), ('Experiment', '|S16'),
                       ('Resolution', '|S14'), ('Frequency', '|S14'),
                       ('Stash', '|S14'), ('JobID', '|S14')])
        a = np.zeros(len(dirs), dt)
        a['GA'] = GA_str
        a['Experiment'] = exp_str
        a['Resolution'] = res_str
        a['Frequency'] = freq_str
        a['Stash'] = stash_str
        a['JobID'] = job_str

        np.save(cat_file, a)

    ### Read catalogue
    cat = np.load(cat_file)

    ### Filter data
    names = kwargs.viewkeys()

    for name in names:

        uniq_label = np.unique(cat[name])
        cat_bool = np.zeros(len(cat), dtype=bool)

        vals = kwargs[name]

        if (vals.__class__ == str): vals = [vals]
        for val in vals:
            if (val not in uniq_label):
                raise ValueError(val + ' not found. See available: ' +
                                 np.array_str(uniq_label))
            cat_bool = np.add(cat_bool, (cat[name] == val))
        cat = cat[cat_bool]

    return cat
Exemple #44
0
    def test_unique_1d(self):

        def check_all(a, b, i1, i2, c, dt):
            base_msg = 'check {0} failed for type {1}'

            msg = base_msg.format('values', dt)
            v = unique(a)
            assert_array_equal(v, b, msg)

            msg = base_msg.format('return_index', dt)
            v, j = unique(a, 1, 0, 0)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i1, msg)

            msg = base_msg.format('return_inverse', dt)
            v, j = unique(a, 0, 1, 0)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i2, msg)

            msg = base_msg.format('return_counts', dt)
            v, j = unique(a, 0, 0, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, c, msg)

            msg = base_msg.format('return_index and return_inverse', dt)
            v, j1, j2 = unique(a, 1, 1, 0)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)

            msg = base_msg.format('return_index and return_counts', dt)
            v, j1, j2 = unique(a, 1, 0, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format('return_inverse and return_counts', dt)
            v, j1, j2 = unique(a, 0, 1, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i2, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format(('return_index, return_inverse '
                                   'and return_counts'), dt)
            v, j1, j2, j3 = unique(a, 1, 1, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)
            assert_array_equal(j3, c, msg)

        a = [5, 7, 1, 2, 1, 5, 7]*10
        b = [1, 2, 5, 7]
        i1 = [2, 3, 0, 1]
        i2 = [2, 3, 0, 1, 0, 2, 3]*10
        c = np.multiply([2, 1, 2, 2], 10)

        # test for numeric arrays
        types = []
        types.extend(np.typecodes['AllInteger'])
        types.extend(np.typecodes['AllFloat'])
        types.append('datetime64[D]')
        types.append('timedelta64[D]')
        for dt in types:
            aa = np.array(a, dt)
            bb = np.array(b, dt)
            check_all(aa, bb, i1, i2, c, dt)

        # test for object arrays
        dt = 'O'
        aa = np.empty(len(a), dt)
        aa[:] = a
        bb = np.empty(len(b), dt)
        bb[:] = b
        check_all(aa, bb, i1, i2, c, dt)

        # test for structured arrays
        dt = [('', 'i'), ('', 'i')]
        aa = np.array(list(zip(a, a)), dt)
        bb = np.array(list(zip(b, b)), dt)
        check_all(aa, bb, i1, i2, c, dt)

        # test for ticket #2799
        aa = [1. + 0.j, 1 - 1.j, 1]
        assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j])

        # test for ticket #4785
        a = [(1, 2), (1, 2), (2, 3)]
        unq = [1, 2, 3]
        inv = [0, 1, 0, 1, 1, 2]
        a1 = unique(a)
        assert_array_equal(a1, unq)
        a2, a2_inv = unique(a, return_inverse=True)
        assert_array_equal(a2, unq)
        assert_array_equal(a2_inv, inv)

        # test for chararrays with return_inverse (gh-5099)
        a = np.chararray(5)
        a[...] = ''
        a2, a2_inv = np.unique(a, return_inverse=True)
        assert_array_equal(a2_inv, np.zeros(5))
Exemple #45
0
N = int(inputsize[0])
M = int(inputsize[1])
X_train = np.zeros((N, M))  #zero array of N rows, M columns
y_train = np.zeros((N)).astype(int)

toBinary = {"+1": 1, "-1": 0}
toOutput = {"1": "+1", "0": "-1"}

for row in range(N):
    line = sys.stdin.readline().rstrip().split(' ')
    y_train[row] = toBinary[line[1]]
    for col in range(M):
        X_train[row, col] = float(line[col + 2].split(':')[1])

N_test = int(sys.stdin.readline().rstrip())
names = np.chararray((N_test), itemsize=5)
X_test = np.zeros((N_test, M))  #zero array of N rows, M columns

for row in range(N_test):
    line = sys.stdin.readline().rstrip().split(' ')
    names[row] = line[0]
    for col in range(M):
        X_test[row, col] = float(line[col + 1].split(':')[1])

# Splitting the dataset into the Training set and Test set
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random.randint(1, 100))
grad_clf = GradientBoostingClassifier(max_depth=1,
                                      n_estimators=300,
                                      learning_rate=0.3)
grad_clf.fit(X_train, y_train)
starty = 0.
endy = 2500.
dy = 10.
numy = int(float(endy - starty) / dy) + 1

x = np.linspace(startx, endx, numx)
y = np.linspace(starty, endy, numy)
nx = len(x)
ny = len(y)

# Bathymetry
X, Y = np.meshgrid(x,y)
Z = np.zeros(X.shape) - 50.
depths = Z.T[:,:, np.newaxis]

sediments = np.chararray((nx,ny,1), itemsize=20)
sediments[:] = "loose sand"
   
strata = {"values": {'depth': depths,
                     'sediment': sediments},
          "coords": [x, y, ["layer 1"]]}
          
# Mannings
G = np.zeros((nx, ny)) + 0.3
geo_raw = {"values": G,
           "coords": [x, y]}

# Soil characteristics           
max_temp = 10.
max_soil_res = 10.
target_burial_depth = 10
plt.ylabel("Sepal width in cm")
plt.show()

import codecademylib3_seaborn
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.cluster import KMeans
import pandas as pd
iris = datasets.load_iris()
samples = iris.data
target = iris.target
model = KMeans(n_clusters=3)
model.fit(samples)
labels = model.predict(samples)
species = np.chararray(target.shape, itemsize=150)
for i in range(len(samples)):
  if target[i] == 0:
    species[i] = 'setosa'
  elif target[i] == 1:
    species[i] = 'versicolor'
  elif target[i] == 2: 
    species[i] = 'virginica'
df = pd.DataFrame({'labels': labels, 'species': species})
print(df)
ct = pd.crosstab(df['labels'], df['species'])
print(ct)

import codecademylib3_seaborn
import matplotlib.pyplot as plt
import numpy as np
Exemple #48
0
def test_empty_indexing():
    """Regression test for ticket 1948."""
    # Check that indexing a chararray with an empty list/array returns an
    # empty chararray instead of a chararray with a single empty string in it.
    s = np.chararray((4,))
    assert_(s[[]].size == 0)
Exemple #49
0
    def _read_dataset(self, data_path, shape=None):
        """Read the dataset into memory. Note, the images are not fetched into
        memory, only their filenames.

        Args:
            data_path: Where is the relative location of the dataset.
            shape (optional): The shape of the input images.
        """
        def to_img_path(filename):
            """The image filenames from file have to be converted, if the png
            format is used.
            """
            if self.png_format_used:
                filename = os.path.splitext(filename)[0] + '.png'
            return filename

        # FIXME If we use the attributes as outputs, then this is a multi-label
        # classification task. Though, we don't capture this case in the
        # Dataset base class yet (it would destroy the current implementation
        # of one hot encodings).
        self._data['classification'] = False
        self._data['sequence'] = False
        self._data['num_classes'] = 40  # 40 different attributes.
        self._data['is_one_hot'] = False
        if shape is not None:
            assert (len(shape) == 2)
            self._data['in_shape'] = shape + [3]
        else:
            self._data['in_shape'] = [218, 178, 3]
        self._data['out_shape'] = [self._data['num_classes']]

        self._data['celeba'] = dict()

        # The annotations dict will contain the annotations of each image
        # except its attributes (i.e., the stuff we currently don't use).
        annotations = defaultdict(dict)

        ## Identity
        # Read the identities. Images with the same identity show the same
        # person.
        ident_fn = os.path.join(data_path, CelebAData._IDENTITY)
        with open(ident_fn) as f:
            ident_file = f.readlines()

        for line in ident_file:
            img_ident, ident = line.split()
            img_ident = to_img_path(img_ident)
            annotations[img_ident]['ident'] = int(ident)

        # Initialize the actual data arrays.
        num_imgs = len(annotations.keys())
        max_str_len = len(max(annotations.keys(), key=len))
        in_data = np.chararray([num_imgs, 1],
                               itemsize=max_str_len,
                               unicode=True)
        out_data = np.empty([num_imgs, self._data['num_classes']],
                            dtype=np.float32)

        ## Attributes
        # Read the list of attributes. This will become the output of this
        # dataset.
        attr_fn = os.path.join(data_path, CelebAData._ATTRIBUTES)

        with open(attr_fn) as f:
            nis = int(f.readline())
            attr_names = f.readline().split()
            attribute_lines = f.readlines()

        assert (nis == num_imgs)
        assert (len(attr_names) == self._data['num_classes'])
        self._data['celeba']['attr_names'] = attr_names

        assert (len(attribute_lines) == num_imgs)
        for i, line in enumerate(attribute_lines):
            words = line.split()
            img_ident = to_img_path(words[0])
            attrs = [int(i) > 0 for i in words[1:]]
            assert (len(attrs) == self._data['num_classes'])

            # The actual index of the sample in the dataset.
            annotations[img_ident]['index'] = i

            ### Fill input and output data.
            in_data[i, :] = img_ident
            out_data[i, :] = attrs

        self._data['in_data'] = in_data
        self._data['out_data'] = out_data

        ## Landmarks
        # Landmarks of aligned and cropped images.
        # The following landmarks are specified for each image:
        # ['lefteye', 'righteye', 'nose', 'leftmouth', 'rightmouth']
        lm_fn = os.path.join(data_path, CelebAData._LANDMARKS)

        with open(lm_fn) as f:
            nis = int(f.readline())
            lm_names_raw = f.readline().split()
            lm_lines = f.readlines()

        assert (nis == num_imgs)
        # A landmark always consists of an x and y coordinate.
        assert (len(lm_names_raw) % 2 == 0)
        assert (len(lm_lines) == num_imgs)

        lm_names = []
        for i in range(0, len(lm_names_raw), 2):
            assert (lm_names_raw[i].endswith('_x') and \
                    lm_names_raw[i + 1].endswith('_y'))
            lm_names.append(lm_names_raw[i][:-2])
        self._data['celeba']['landmark_names'] = lm_names

        for line in lm_lines:
            words = line.split()
            img_ident = to_img_path(words[0])
            locs = [int(i) for i in words[1:]]
            assert (len(locs) == len(lm_names_raw))

            lms = dict()
            for i in range(0, len(locs), 2):
                lms[lm_names[i // 2]] = (locs[i], locs[i + 1])

            annotations[img_ident]['landmarks'] = lms

        ## Partitioning
        # Load partitioning (what samples belong to train (0), test (2) and
        # val (1) set?).
        part_fn = os.path.join(data_path, CelebAData._PARTITIONS)
        with open(part_fn) as f:
            partitions = f.readlines()

        assert (len(partitions) == num_imgs)

        train_inds = []
        test_inds = []
        val_inds = []
        for i, line in enumerate(partitions):
            img_ident, partition = line.split()
            img_ident = to_img_path(img_ident)
            partition = int(partition)

            assert (i == annotations[img_ident]['index'])

            if partition == 0:
                train_inds.append(i)
            elif partition == 1:
                val_inds.append(i)
            else:
                test_inds.append(i)

        self._data['train_inds'] = np.asarray(train_inds)
        self._data['test_inds'] = np.asarray(test_inds)
        self._data['val_inds'] = np.asarray(val_inds)
        assert (len(train_inds) + len(test_inds) + len(val_inds) == num_imgs)

        self._data['celeba']['anno'] = annotations
def visualize_nearest_neighbor(featuremap_dict,
                               num_neighbor=5,
                               top_number=5,
                               vis=True,
                               save_csv=False,
                               csv_save_path=None,
                               save_vis=False,
                               save_img=False,
                               save_thumb_name='nearest_neighbor.png',
                               img_src_folder=None,
                               ext_filter='.jpg',
                               nn_save_folder=None,
                               debug=True):
    '''
    visualize nearest neighbor for featuremap from images

    parameter:
        featuremap_dict: a dictionary contains image path as key, and featuremap as value, the featuremap needs to be numpy array with any shape. No flatten needed
        num_neighbor: number of neighbor to visualize, the first nearest is itself
        top_number: number of top to visualize, since there might be tons of featuremap (length of dictionary), we choose the top ten with lowest distance with their nearest neighbor
        csv_save_path: path to save .csv file which contains indices and distance array for all elements
        nn_save_folder: save the nearest neighbor images for top featuremap

    return:
        all_sorted_nearest_id: a 2d matrix, each row is a feature followed by its nearest neighbor in whole feature dataset, the column is sorted by the distance of all nearest neighbor each row
        selected_nearest_id: only top number of sorted nearest id 
    '''
    print('processing feature map to nearest neightbor.......')
    if debug:
        assert isdict(featuremap_dict), 'featuremap should be dictionary'
        assert all(
            isnparray(featuremap_tmp) for featuremap_tmp in featuremap_dict.
            values()), 'value of dictionary should be numpy array'
        assert isinteger(
            num_neighbor
        ) and num_neighbor > 1, 'number of neighborhodd is an integer larger than 1'
        if save_csv and csv_save_path is not None:
            assert is_path_exists_or_creatable(
                csv_save_path), 'path to save .csv file is not correct'

        if save_vis or save_img:
            if nn_save_folder is not None:  # save image directly
                assert isstring(ext_filter), 'extension filter is not correct'
                assert is_path_exists(
                    img_src_folder), 'source folder for image is not correct'
                assert all(
                    isstring(path_tmp) for path_tmp in featuremap_dict.keys()
                )  # key should be the path for the image
                assert is_path_exists_or_creatable(
                    nn_save_folder
                ), 'folder to save top visualized images is not correct'
                assert isstring(
                    save_thumb_name), 'name of thumbnail is not correct'

    if ext_filter.find('.') == -1:
        ext_filter = '.%s' % ext_filter

    # flatten the feature map
    nn_feature_dict = dict()
    for key, featuremap_tmp in featuremap_dict.items():
        nn_feature_dict[key] = featuremap_tmp.flatten()
    num_features = len(nn_feature_dict)

    # nearest neighbor
    featuremap = np.array(nn_feature_dict.values())
    nearbrs = NearestNeighbors(n_neighbors=num_neighbor,
                               algorithm='ball_tree').fit(featuremap)
    distances, indices = nearbrs.kneighbors(featuremap)

    if debug:
        assert featuremap.shape[
            0] == num_features, 'shape of feature map is not correct'
        assert indices.shape == (
            num_features, num_neighbor), 'shape of indices is not correct'
        assert distances.shape == (
            num_features, num_neighbor), 'shape of indices is not correct'

    # convert the nearest indices for all featuremap to the key accordingly
    id_list = nn_feature_dict.keys()
    max_length = len(max(
        id_list, key=len))  # find the maximum length of string in the key
    nearest_id = np.chararray(indices.shape, itemsize=max_length + 1)
    for x in range(nearest_id.shape[0]):
        for y in range(nearest_id.shape[1]):
            nearest_id[x, y] = id_list[indices[x, y]]

    if debug:
        assert list(nearest_id[:,
                               0]) == id_list, 'nearest neighbor has problem'

    # sort the feature based on distance
    print('sorting the feature based on distance')
    featuremap_distance = np.sum(distances, axis=1)
    if debug:
        assert featuremap_distance.shape == (
            num_features, ), 'distance is not correct'
    sorted_indices = np.argsort(featuremap_distance)
    all_sorted_nearest_id = nearest_id[sorted_indices, :]

    # save to the csv file
    if save_csv and csv_save_path is not None:
        print('Saving nearest neighbor result as .csv to path: %s' %
              csv_save_path)
        with open(csv_save_path, 'w+') as file:
            np.savetxt(file, distances, delimiter=',', fmt='%f')
            np.savetxt(file, all_sorted_nearest_id, delimiter=',', fmt='%s')
            file.close()

    # choose the best to visualize
    selected_sorted_indices = sorted_indices[0:top_number]
    if debug:
        for i in range(num_features - 1):
            assert featuremap_distance[
                sorted_indices[i]] < featuremap_distance[sorted_indices[
                    i + 1]], 'feature map is not well sorted based on distance'
    selected_nearest_id = nearest_id[selected_sorted_indices, :]

    if save_vis:
        fig, axarray = plt.subplots(top_number, num_neighbor)
        for index in range(top_number):
            for nearest_index in range(num_neighbor):
                img_path = os.path.join(
                    img_src_folder, '%s%s' %
                    (selected_nearest_id[index, nearest_index], ext_filter))
                if debug:
                    print('loading image from %s' % img_path)
                img = imread(img_path)
                if isgrayimage_dimension(img):
                    axarray[index, nearest_index].imshow(img, cmap='gray')
                elif iscolorimage_dimension(img):
                    axarray[index, nearest_index].imshow(img)
                else:
                    assert False, 'unknown error'
                axarray[index, nearest_index].axis('off')
        save_thumb = os.path.join(nn_save_folder, save_thumb_name)
        fig.savefig(save_thumb)
        if vis:
            plt.show()
        plt.close(fig)

    # save top visualization to the folder
    if save_img and nn_save_folder is not None:
        for top_index in range(top_number):
            file_list = selected_nearest_id[top_index]
            save_subfolder = os.path.join(nn_save_folder, file_list[0])
            mkdir_if_missing(save_subfolder)
            for file_tmp in file_list:
                file_src = os.path.join(img_src_folder,
                                        '%s%s' % (file_tmp, ext_filter))
                save_path = os.path.join(save_subfolder,
                                         '%s%s' % (file_tmp, ext_filter))
                if debug:
                    print('saving %s to %s' % (file_src, save_path))
                shutil.copyfile(file_src, save_path)

    return all_sorted_nearest_id, selected_nearest_id
Exemple #51
0
    def sim_reads(self):
        simLibObj = libOps.libObj(settingsfilename='libsettings.txt',
                                  output_prefix='_')
        enforced_rev_read_len = 100
        [for_read_len, rev_read_len] = simLibObj.get_min_allowed_readlens(
            simLibObj.filter_amplicon_window)
        rev_read_len = int(enforced_rev_read_len)
        '''
        simLibObj.seqform_for_params and simLibObj.seqform_rev_params are already stored in current object's memory
        Form of these variables is a list of the following:
            Element 1: [start_pos,end_pos]
            Element 2: np.ndarray(seq_bool_vec, dtype=np.bool_)
            Element 3: np.ndarray(capital_bool_vec, dtype=np.bool_)
            Element 4: np.ndarray(ambig_vec, dtype=np.bool_)
        '''
        [subdirnames, filenames] = sysOps.get_directory_and_file_list()

        for_umi_seqs = list()
        rev_umi_seqs = list()
        rev_umi_amplicon_list = list()
        uei_seqs = list()
        base_order = 'ACGT'

        sysOps.throw_status('Generating simulated sequences ...')
        amplicon_list = list()
        if "-amplicon" in simLibObj.mySettings:
            amplicon_list = [
                simLibObj.mySettings["-amplicon"][i].upper().split(',')
                for i in range(len(simLibObj.mySettings["-amplicon"]))
            ]

        for for_umi_i in range(self.Nbcn):
            for_param_index = np.random.randint(
                len(simLibObj.seqform_for_params))
            if len(simLibObj.seqform_for_params[for_param_index]) > 1:
                sysOps.throw_exception(
                    'Error: len(simLibObj.seqform_for_params[for_param_index]) = '
                    + str(len(simLibObj.seqform_for_params[for_param_index])))
                sysOps.exitProgram()
            my_for_umi_param = simLibObj.seqform_for_params[for_param_index][
                0]['U'][0]
            [start_pos, end_pos] = my_for_umi_param[0]
            seq_bool_vec = my_for_umi_param[1]
            my_for_umi = str('')
            for pos in range(end_pos - start_pos):
                possible_bases = np.where(seq_bool_vec[(pos * 4):((pos + 1) *
                                                                  4)])[0]
                my_for_umi += base_order[possible_bases[np.random.randint(
                    possible_bases.shape[0])]]

            for_umi_seqs.append([int(for_param_index), str(my_for_umi)])

        for for_uei_i in range(self.Nuei):
            for_param_index = 0  # there should be no difference across UMI's
            my_for_uei_param = simLibObj.seqform_for_params[for_param_index][
                0]['U'][1]
            [start_pos, end_pos] = my_for_uei_param[0]
            seq_bool_vec = my_for_uei_param[1]
            my_for_uei = str('')
            for pos in range(end_pos - start_pos):
                possible_bases = np.where(seq_bool_vec[(pos * 4):((pos + 1) *
                                                                  4)])[0]
                my_for_uei += base_order[possible_bases[np.random.randint(
                    possible_bases.shape[0])]]

            uei_seqs.append(str(my_for_uei))

        for rev_umi_i in range(self.Ntrg):
            rev_param_index = np.random.randint(
                len(simLibObj.seqform_rev_params))
            my_rev_umi_param = simLibObj.seqform_rev_params[rev_param_index][
                0]['U'][0]
            [start_pos, end_pos] = my_rev_umi_param[0]
            seq_bool_vec = my_rev_umi_param[1]
            my_rev_umi = str('')
            for pos in range(end_pos - start_pos):
                possible_bases = np.where(seq_bool_vec[(pos * 4):((pos + 1) *
                                                                  4)])[0]
                my_rev_umi += base_order[possible_bases[np.random.randint(
                    possible_bases.shape[0])]]

            if len(amplicon_list) == 0:
                encoded_amplicon = str('')
            else:
                this_gsp_primer_amplicon_pair = list(
                    amplicon_list[np.random.randint(len(amplicon_list))]
                )  # already properly oriented # already properly oriented
                # generate single error on amplicon
                lenamp = len(this_gsp_primer_amplicon_pair[1])
                rand_loc = np.random.randint(lenamp)
                this_gsp_primer_amplicon_pair[1] = str(
                    this_gsp_primer_amplicon_pair[1][:rand_loc] +
                    base_order[np.random.randint(4)] +
                    this_gsp_primer_amplicon_pair[1][(rand_loc + 1):])
                encoded_amplicon = ''.join(this_gsp_primer_amplicon_pair)

            tmp_umi_index = float(rev_umi_i)

            if tmp_umi_index == 0:
                encoded_amplicon += base_order[0]
            else:
                for myexponent in range(
                        int(np.floor(np.log(tmp_umi_index) / np.log(4.0))), -1,
                        -1):
                    mydigit = np.floor(tmp_umi_index /
                                       np.power(4.0, myexponent))
                    encoded_amplicon += base_order[int(mydigit)]
                    tmp_umi_index -= mydigit * np.power(4.0, myexponent)

            rev_umi_seqs.append(
                [int(rev_param_index),
                 str(my_rev_umi),
                 str(encoded_amplicon)])

        sysOps.throw_status('Writing simulated reads ...')

        for filename in filenames:
            if filename.endswith('_sim_ueifile.csv'):
                ueifile = np.int64(
                    np.loadtxt(sysOps.globaldatapath + filename,
                               delimiter=','))
                newdirname = filename[:filename.find('_')]
                read_list = list()
                for i in range(ueifile.shape[0]):
                    for myread in range(ueifile[i, 3]):
                        read_list.append(np.array([ueifile[i, :3]]))
                read_list = np.concatenate(
                    read_list, axis=0
                )  # re-write array so that there is now one row per read
                # randomly permute:
                read_list = read_list[
                    np.random.permutation(read_list.shape[0]), :]

                for_chararray = np.chararray((for_read_len))
                rev_chararray = np.chararray((rev_read_len))
                for_fastq_outfile = open(newdirname + '_for.fastq', "w")
                rev_fastq_outfile = open(newdirname + '_rev.fastq', "w")
                for i in range(read_list.shape[0]):
                    for_param_index = for_umi_seqs[read_list[i, 1]][0]
                    for_umi_seq = for_umi_seqs[read_list[i, 1]][1]
                    rev_param_index = rev_umi_seqs[read_list[i, 2]][
                        0]  # both beacon and target indices are at this point are independently indexed from 0
                    rev_umi_seq = rev_umi_seqs[read_list[i, 2]][1]
                    rev_amp_seq = rev_umi_seqs[read_list[i, 2]][2]
                    uei_seq = uei_seqs[read_list[i, 0]]

                    for j in range(for_read_len):
                        for_chararray[j] = 'N'
                    for j in range(rev_read_len):
                        rev_chararray[j] = 'N'

                    my_for_umi_param = simLibObj.seqform_for_params[
                        for_param_index][0]['U'][0]
                    [start_pos, end_pos] = my_for_umi_param[0]
                    for j in range(end_pos - start_pos):
                        for_chararray[j + start_pos] = for_umi_seq[j]

                    my_for_uei_param = simLibObj.seqform_for_params[
                        for_param_index][0]['U'][1]
                    [start_pos, end_pos] = my_for_uei_param[0]
                    for j in range(end_pos - start_pos):
                        for_chararray[j + start_pos] = uei_seq[j]

                    for my_for_param in simLibObj.seqform_for_params[
                            for_param_index][0]['P']:
                        [start_pos, end_pos] = my_for_param[0]
                        for j in range(end_pos - start_pos):
                            for_chararray[j + start_pos] = base_order[np.where(
                                my_for_param[1][(4 * j):(4 * (j + 1))])[0][0]]

                    my_rev_umi_param = simLibObj.seqform_rev_params[
                        rev_param_index][0]['U'][0]
                    [start_pos, end_pos] = my_rev_umi_param[0]
                    for j in range(end_pos - start_pos):
                        rev_chararray[j + start_pos] = rev_umi_seq[j]
                    my_rev_amp_param = simLibObj.seqform_rev_params[
                        rev_param_index][0]['A'][0]
                    start_pos = my_rev_amp_param[0][0]
                    for j in range(len(rev_amp_seq)):
                        rev_chararray[j + start_pos] = rev_amp_seq[j]

                    if 'P' in simLibObj.seqform_rev_params[rev_param_index][0]:
                        for my_rev_param in simLibObj.seqform_rev_params[
                                rev_param_index][0]['P']:
                            [start_pos, end_pos] = my_rev_param[0]
                            for j in range(end_pos - start_pos):
                                rev_chararray[j +
                                              start_pos] = base_order[np.where(
                                                  my_rev_param[1][(4 * j):(
                                                      4 * (j + 1))])[0][0]]

                    for_record = SeqIO.SeqRecord(
                        Seq.Seq(for_chararray.tostring()))
                    for_record.id = '-' + str(i) + '-' + str(read_list[i, 1])
                    for_record.description = ''
                    for_record.letter_annotations['phred_quality'] = list(
                        [30 for j in range(for_read_len)])
                    rev_record = SeqIO.SeqRecord(
                        Seq.Seq(rev_chararray.tostring()))
                    rev_record.id = '-' + str(i) + '-' + str(read_list[i, 2])
                    rev_record.description = ''
                    rev_record.letter_annotations['phred_quality'] = list(
                        [30 for j in range(rev_read_len)])
                    SeqIO.write(for_record, for_fastq_outfile, "fastq")
                    SeqIO.write(rev_record, rev_fastq_outfile, "fastq")

                for_fastq_outfile.close()
                rev_fastq_outfile.close()
                os.mkdir(newdirname)
                with open('libsettings.txt', 'rU') as oldsettingsfile:
                    with open(newdirname + '//libsettings.txt',
                              'w') as newsettingsfile:
                        for oldsettings_row in oldsettingsfile:
                            if oldsettings_row.startswith('-source_for'):
                                newsettingsfile.write('-source_for ..//' +
                                                      newdirname +
                                                      '_for.fastq\n')
                            elif oldsettings_row.startswith('-source_rev'):
                                newsettingsfile.write('-source_rev ..//' +
                                                      newdirname +
                                                      '_rev.fastq\n')
                            else:
                                newsettingsfile.write(oldsettings_row)

        sysOps.throw_status('Done.')
        return
# trow, tcol are x,y for the top right
trow = 0
tcol = BOARD_COL - 1

#init the states grid

#this is the store the state value (of every cell) of the individual sweep
states = np.zeros(BOARD_ROW * BOARD_COL).reshape(BOARD_ROW, BOARD_COL)

#this is to store the cumulative value after the sweep ends
statesnew = np.zeros(BOARD_ROW * BOARD_COL).reshape(BOARD_ROW, BOARD_COL)

states[trow, tcol] = 1  # terminal state value
states[trow + 1, tcol] = -1  # terminal state value

runningpolicy = np.chararray(BOARD_ROW * BOARD_COL)
latestpolicy = np.chararray(BOARD_ROW * BOARD_COL)
'''
Note on actions
The agent can move either up,down,left or right (4 possible actions)
From some cell on the border of the grid one or two actions may not be possible
as in the case of the top right and bottom left. All other edges have only 3 actions
Actions have equal probability of 0.25% each (0.25 * 4 = 100%)
The sum of probabilites of all actions has to be 100%

Each action has a reward of -1
'''


def getreward(row, col):
    reward = -0.04
Exemple #53
0
def gen_random_data():
    new_data = np.chararray((80), unicode=True)
    for i in range(len(new_data)):
        tile_index = np.random.randint(len(tiles))
        new_data[i] = tiles[tile_index]
    return new_data
    3
)  #column starting indices in which a vertical victory is possible (rows 1-3)
win_rows = range(
    rows)  #rows in which a horizontal victory is possible (all of them)
win_cols = range(
    cols)  #columns in which a vertical victory is possible (all of them)
diag_cols = range(
    4)  #columns in which a diagonal victory is possible (columns 1-4)
diag_rows_down = range(
    3)  #rows in which a downward diagonal victory is possible (top 3 rows)
diag_rows_up = range(
    3, rows
)[::-1]  #rows in which an upward diagonal victory is possible (bottom 3 rows)

#create board as 6x7 zeros array
board = np.chararray((rows, cols))
board[:] = 0
print(board)

#play the game!
while True:

    #determine who's turn it is
    if numTurns % 2 != 0:
        player = players['player2'][0]
        playerID = players['player2'][1]
    else:
        player = players['player1'][0]
        playerID = players['player1'][1]

    #take player's input
Exemple #55
0
                      names=True,
                      dtype=None)
pgc = table['PGC']
ra = table['RAJ']  # deg
dec = table['DEJ']  # deg
d1 = table['MAJ']  # arcmin
d2 = table['MIN']  # arcmin
pa = table['PA']
ty = table['Type']
uu = table['u']
gg = table['g']
rr = table['r']
ii = table['i']
zz = table['z']

temp = np.chararray(len(pgc))

N = 60
db_root = '/home/ehsan/db_esn/data/'

mags = np.zeros([N, 5])

no = 0
for i in range(N):

    db_id = ra_db(ra[i])
    pgc_id = 'pgc' + str(pgc[i])

    filters = ['u', 'g', 'r', 'i', 'z']

    for p in range(5):
def initialize_timestep_data(bpr, weather_data):
    """
    initializes the time step data with the weather data and the minimum set of variables needed for computation.

    :param bpr: a collection of building properties for the building used for thermal loads calculation
    :type bpr: BuildingPropertiesRow
    :param weather_data: data from the .epw weather file. Each row represents an hour of the year. The columns are:
        ``drybulb_C``, ``relhum_percent``, and ``windspd_ms``
    :type weather_data: pandas.DataFrame

    :return: returns the `tsd` variable, a dictionary of time step data mapping variable names to ndarrays for each hour of the year.
    :rtype: dict
    """

    # Initialize dict with weather variables
    tsd = {
        'T_ext': weather_data.drybulb_C.values,
        'T_ext_wetbulb': weather_data.wetbulb_C.values,
        'rh_ext': weather_data.relhum_percent.values,
        'T_sky': weather_data.skytemp_C.values,
        'u_wind': weather_data.windspd_ms
    }

    # fill data with nan values

    nan_fields_electricity = [
        'Eaux', 'Eaux_ve', 'Eaux_hs', 'Eaux_cs', 'Eaux_ww', 'Eaux_fw',
        'Ehs_lat_aux', 'GRID', 'GRID_a', 'GRID_l', 'GRID_v', 'GRID_data',
        'GRID_pro', 'GRID_aux', 'GRID_ww', 'GRID_hs', 'GRID_cs'
        'GRID_cdata', 'GRID_cre', 'PV', 'Eal', 'Edata', 'Epro', 'E_sys',
        'E_ww', 'E_hs', 'E_cs', 'E_cre', 'E_cdata'
    ]
    nan_fields = [
        'mcpww_sys', 'mcptw', 'mcpcre_sys', 'mcpcdata_sys', 'SOLAR_ww',
        'SOLAR_hs', 'NG_hs', 'COAL_hs', 'OIL_hs', 'WOOD_hs', 'NG_ww',
        'COAL_ww', 'OIL_ww', 'WOOD_ww', 'vfw_m3perh'
    ]
    nan_fields.extend(TSD_KEYS_HEATING_LOADS)
    nan_fields.extend(TSD_KEYS_COOLING_LOADS)
    nan_fields.extend(TSD_KEYS_HEATING_TEMP)
    nan_fields.extend(TSD_KEYS_COOLING_TEMP)
    nan_fields.extend(TSD_KEYS_COOLING_FLOWS)
    nan_fields.extend(TSD_KEYS_HEATING_FLOWS)
    nan_fields.extend(TSD_KEYS_COOLING_SUPPLY_FLOWS)
    nan_fields.extend(TSD_KEYS_COOLING_SUPPLY_TEMP)
    nan_fields.extend(TSD_KEYS_HEATING_SUPPLY_FLOWS)
    nan_fields.extend(TSD_KEYS_HEATING_SUPPLY_TEMP)
    nan_fields.extend(TSD_KEYS_RC_TEMP)
    nan_fields.extend(TSD_KEYS_MOISTURE)
    nan_fields.extend(TSD_KEYS_ENERGY_BALANCE_DASHBOARD)
    nan_fields.extend(TSD_KEYS_SOLAR)
    nan_fields.extend(TSD_KEYS_VENTILATION_FLOWS)
    nan_fields.extend(nan_fields_electricity)
    nan_fields.extend(TSD_KEYS_PEOPLE)

    tsd.update(dict((x, np.zeros(HOURS_IN_YEAR) * np.nan) for x in nan_fields))

    # initialize system status log
    tsd['sys_status_ahu'] = np.chararray(HOURS_IN_YEAR, itemsize=20)
    tsd['sys_status_aru'] = np.chararray(HOURS_IN_YEAR, itemsize=20)
    tsd['sys_status_sen'] = np.chararray(HOURS_IN_YEAR, itemsize=20)
    tsd['sys_status_ahu'][:] = 'unknown'
    tsd['sys_status_aru'][:] = 'unknown'
    tsd['sys_status_sen'][:] = 'unknown'

    return tsd
Exemple #57
0
def re_identification(clientId, nb_time_max, name0):

    simple_message(
        clientId,
        u'Veuillez rapprocher vers la camera, ou bouger votre tête...')
    global global_vars
    global_var = (item for item in global_vars
                  if item["clientId"] == str(clientId)).next()

    global_var['text'] = ''
    global_var['text2'] = ''
    global_var['text3'] = ''

    tb_old_name = np.chararray(
        shape=(nb_time_max + 1),
        itemsize=10)  # All of the old recognition results, which are wrong
    tb_old_name[:] = ''
    tb_old_name[0] = name0

    nb_time = 0
    global_var['flag_enable_recog'] = 1
    global_var['flag_reidentify'] = 1
    global_var['flag_ask'] = 0

    while (nb_time < nb_time_max):
        time.sleep(wait_time)  # wait until after the re-identification is done

        name1 = global_var['nom']  # New result

        if np.all(tb_old_name != name1) and global_var['flag_recog']:
            print 'Essaie ' + str(nb_time +
                                  1) + ': reconnu comme ' + str(name1)

            resp = validate_recognition(clientId, str(name1))
            print resp
            if (resp == 1):
                result = 1
                name = name1
                break
            else:
                result = 0
                nb_time += 1
                tb_old_name[nb_time] = name1

        elif (not global_var['flag_recog']):
            print 'Essaie ' + str(nb_time + 1) + ': personne inconnue'
            result = 0
            nb_time += 1

    if (result == 1):  # User confirms that the recognition is correct now
        global_var['flag_enable_recog'] = 0
        # global_var['flag_reidentify']   = 0
        global_var['flag_wrong_recog'] = 0

        get_face_emotion_api_results(clientId)
        time.sleep(2)
        go_to_formation(clientId, xls_filename, name)

    else:  # Two time failed to recognized
        global_var[
            'flag_enable_recog'] = 0  # Disable recognition when two tries have failed
        # global_var['flag_reidentify']   = 0
        simple_message(
            clientId,
            u'Désolé je vous reconnaît pas, veuillez me donner votre identifiant'
        )

        name = ask_name(clientId, 0)
        if os.path.exists(
                imgPath + str(name) + ".0" + suffix
        ):  # Assume that user's face-database exists if the photo 0.png exists
            simple_message(
                clientId, 'Bonjour ' + str(name) +
                ', je vous conseille de changer vos photos')
            flag_show_photos = 1
            step_time = 1

            thread_show_photos3 = Thread(target=show_photos,
                                         args=(clientId, imgPath, name),
                                         name='thread_show_photos3_' +
                                         clientId)
            thread_show_photos3.start()

            time.sleep(0.5)
            thread_retake_validate_photos2 = Thread(
                target=retake_validate_photos,
                args=(clientId, step_time, flag_show_photos, imgPath, name),
                name='thread_retake_validate_photos2_' + clientId)
            thread_retake_validate_photos2.start()
        else:
            simple_message(
                clientId, "Malheureusement, les photos correspondant au nom " +
                str(name) +
                " n'existent pas. Je vous conseille de reprendre vos photos")

            time.sleep(1)
            global_var['flag_take_photo'] = 1  # Enable photo taking

    global_var['flag_reidentify'] = 0
Exemple #58
0
soundResp = goodQualCells.behavZscore.apply(
    lambda x: np.max(np.abs(x[~np.isnan(x)])) >= maxZThreshold)
moreRespLowFreq = soundResp & goodQualCells.behavZscore.apply(
    lambda x: abs(x[~np.isnan(x)][0]) > abs(x[~np.isnan(x)][-1]))
moreRespHighFreq = soundResp & goodQualCells.behavZscore.apply(
    lambda x: abs(x[~np.isnan(x)][-1]) > abs(x[~np.isnan(x)][0]))
goodSoundRespCells = goodQualCells[soundResp]
goodLowFreqRespCells = goodQualCells[moreRespLowFreq]
goodHighFreqRespCells = goodQualCells[moreRespHighFreq]
print '{} cells were sound responsive for both areas'.format(
    len(goodSoundRespCells))
soundRespInds = goodSoundRespCells.index
lowFreqRespInds = goodLowFreqRespCells.index
highFreqRespInds = goodHighFreqRespCells.index
aveSpikeCountByBlockAllCells = np.zeros((2, len(timeVec) - 1, len(celldb)))
brainAreaEachCell = np.chararray(len(celldb), itemsize=9)

if bestFreq:
    print(
        'Caculating for all sound responsive cells using only trials with best freq.'
    )
    for indC, cell in goodLowFreqRespCells.iterrows():
        cellObj = ephyscore.Cell(cell)
        print 'Calculating ave spike count by block for cell {}'.format(indC)
        subject = cell.subject
        date = cell.date
        depth = cell.depth
        brainArea = cell.brainArea
        brainAreaEachCell[indC] = brainArea
        evlockFile = '{0}_{1}_{2}_T{3}_c{4}_{5}.npz'.format(
            subject, date, depth, cell.tetrode, cell.cluster, alignment)
Exemple #59
0
    def match_stroke(self):
        
        dir_b = SAVE_PATH + '/' +  TYPE_2 + '/' + self.character
        if not os.path.exists(dir_b):
            print('SG: %s does not has coorespond BK.' % self.character)
            return
        # load start_end.txt of SG stroke
        point_file_a = '%s_start_end.txt' % (self.character)    
        start_end_a = np.loadtxt(SAVE_PATH + '/' +  TYPE_1 + '/' + self.character + '/' + point_file_a, delimiter=',', dtype=np.int16)
        num_stroke_SG = int(start_end_a.shape[0] / 3)
        num_stroke_BK = int(len(os.listdir(dir_b))/4)
        match_table = np.chararray((num_stroke_SG, 2), itemsize=4, unicode=True)
        dis_table = np.zeros((num_stroke_SG, num_stroke_BK))
        dis_table[:] = 999999
        
        # print('start_end_a:')
        # print(start_end_a)
        for len_a in range(num_stroke_SG):
            match_table[len_a, 0] = len_a + 1 
            _, start_x_a, start_y_a = start_end_a[3 * len_a]
            _, mid_x_a, mid_y_a     = start_end_a[3 * len_a + 1]
            _, end_x_a, end_y_a     = start_end_a[3 * len_a + 2]

            #print('start_x_a, start_y_a:', start_x_a, start_y_a)
            #print('mid_x_a, mid_y_a:', mid_x_a, mid_y_a)
            #print('end_x_a, end_y_a', end_x_a, end_y_a)

            min_distance = 999999
            #print('################################################')
            
            for i in range(1, num_stroke_BK+1):
            
                # load start_end.txt of BK stroke
                start_end_b = np.loadtxt(dir_b + '/%s_%02d_start_end.txt' % (self.character, i), delimiter=',', dtype=np.int16)
                _, start_x_b, start_y_b = start_end_b[0]
                _, mid_x_b, mid_y_b     = start_end_b[1]
                _, end_x_b, end_y_b     = start_end_b[2]

                #print('Comparing SG: %s_%d with BK: %04d' % (self.img_name[:-4], len_a+1, i))
                #print('start_x_b, start_y_b:', start_x_b, start_y_b)
                #print('mid_x_b, mid_y_b:', mid_x_b, mid_y_b)
                #print('end_x_b, end_y_b', end_x_b, end_y_b)

                # compute two direction inner stroke
                # direction 1:
                dis_start = (start_x_a - start_x_b)**2 + (start_y_a - start_y_b)**2 
                dis_mid   = (mid_x_a - mid_x_b)**2 + (mid_y_a - mid_y_b)**2
                dis_end   = (end_x_a - end_x_b)**2 + (end_y_a - end_y_b)**2

                dis_1 = dis_start + dis_mid + dis_end
                
                # direction 2:
                dis_start = (start_x_a - end_x_b)**2 + (start_y_a - end_y_b)**2 
                dis_mid   = (mid_x_a - mid_x_b)**2 + (mid_y_a - mid_y_b)**2
                dis_end   = (end_x_a - start_x_b)**2 + (end_y_a - start_y_b)**2

                dis_2 = dis_start + dis_mid + dis_end
                
                # find the actual distance in correct direction
                dis = min(dis_1, dis_2)
                dis_table[len_a, i-1] = dis
                #print('min dis:', min_distance, 'dis:', dis)
                
                if dis < min_distance:
                    min_distance = dis
                    match_table[len_a, 1] = '%04d' % i
            #print('SG: %s_%d is matched to BK: %4s' % (self.character, len_a+1, match_table[len_a, 1]))        
            #print('################################################')
        #print(dis_table)
        match_result = np.zeros((num_stroke_SG))
        match_result[:] = -1
        print('################################################')
        print(dis_table)
        
        while -1 in match_result:
            min_dis = np.unravel_index(np.argmin(dis_table), dis_table.shape)
            
            # min_dis[0] : stroke order of SK
            # min_dis[1] : stroke order of BK
            # SG stroke is not matched
            if match_result[min_dis[0]] == -1:
                #print('################################################')
                # BK stroke is not matched
                #if (min_dis[1] + 1) not in match_result:
                match_result[min_dis[0]] = min_dis[1] + 1
                #print('matched.')
                
                #print('min:', dis_table[min_dis])
                #print('min idx: (%d, %d)' % (min_dis[0]+1, min_dis[1]+1))
                #print(dis_table)
                dis_table[min_dis] = 999999
                #print(match_result)
                #print('################################################')
                
                SG_img = Image.open('result/SG/%s/SG_%s_%02d.jpg' % (self.character, self.character, min_dis[0] + 1))
                BK_img = Image.open('result/BK/%s/BK_%s_%02d.jpg' % (self.character, self.character, min_dis[1] + 1))
                
                plt.figure(figsize=(8,4))
                plt.subplot(1, 2, 1)
                plt.title('%s : %d (瘦金體)' % (self.character, min_dis[0] + 1))
                plt.axis('off')
                plt.imshow(SG_img)
                
                plt.subplot(1, 2, 2)
                plt.title('%s : %d (標楷體)' % (self.character, min_dis[1] + 1))
                plt.imshow(BK_img)
                plt.axis('off')
                
                plt.savefig('result/match_img/%s_%02d.jpg' % (self.character, min_dis[0] + 1))
                # plt.show()
                
            # SG stroke is matched
            else:
                dis_table[min_dis] = 999999
        
        print(match_result) 
        print('################################################')

            
        np.savetxt(SAVE_PATH + '/' +  TYPE_1 + '/' + '%s_match.txt' % self.character, match_result, fmt='%d', delimiter=',')       
Exemple #60
0
        passband=[.009, 9999]))
#MyStruct(outid='00P+AROMAAgg',usearoma=False,n_init2drop=0,nonaggr=False,
#         noise=[],expansion=0,
#         spkreg=0,fdthr=99,dvrthr=99,addnoise=baseregressors+['AROMAAggrComp*','aroma_motion*'],passband=[.009,9999]),
#MyStruct(outid='01P+AROMAAgg',usearoma=False,n_init2drop=0,nonaggr=False,
#         noise=['GlobalSignal','global_signal'],expansion=0,
#         spkreg=0,fdthr=99,dvrthr=99,addnoise=baseregressors+['AROMAAggrComp*','aroma_motion*'],passband=[.009,9999]),
#MyStruct(outid='02P+AROMAAgg',usearoma=False,n_init2drop=0,nonaggr=False,
#         noise=['WhiteMatter','CSF','white_matter','csf'],expansion=0,
#         spkreg=0,fdthr=99,dvrthr=99,addnoise=baseregressors+['AROMAAggrComp*','aroma_motion*'],passband=[.009,9999]),
#MyStruct(outid='03P+AROMAAgg',usearoma=False,n_init2drop=0,nonaggr=False,
#         noise=['GlobalSignal','WhiteMatter','CSF','global_signal','white_matter','csf'],expansion=0,
#         spkreg=0,fdthr=99,dvrthr=99,addnoise=baseregressors+['AROMAAggrComp*','aroma_motion*'],passband=[.009,9999]) )

idlist = np.chararray((len(funcdat), len(pipelines)),
                      itemsize=len(os.path.basename(funcdat[0]).split('_')[0]),
                      unicode=True)
atlaslist = np.chararray((len(funcdat), len(pipelines)),
                         itemsize=len(atlas),
                         unicode=True)
ses = np.chararray((len(funcdat), len(pipelines)), itemsize=2, unicode=True)
task = np.chararray((len(funcdat), len(pipelines)), itemsize=5, unicode=True)
run = np.chararray((len(funcdat), len(pipelines)), itemsize=5, unicode=True)
fdthr = np.zeros((len(funcdat), len(pipelines)))
dvthr = np.zeros((len(funcdat), len(pipelines)))
ntr = np.zeros((len(funcdat), len(pipelines)))
ntrabovethr = np.zeros((len(funcdat), len(pipelines)))
pctdflost = np.zeros((len(funcdat), len(pipelines)))
mfd = np.zeros((len(funcdat), len(pipelines)))
medfd = np.zeros((len(funcdat), len(pipelines)))
maxfd = np.zeros((len(funcdat), len(pipelines)))