예제 #1
0
def readPatientFile():
    """ Returns array of people, demographics (feature labels), array with each patients data, and patient: feature array dictionary"""
    demographics = []
    people = []
    survivalData = []
    survival = []
    peopleDict = {}
    """ Returns an array of people, demographics, and survivalData"""
    with open('TCGA_LUAD_survival.csv') as survivalFile:
        line_count = 0
        csv_reader = csv.reader(survivalFile, delimiter=',')
        for row in csv_reader:
            if line_count == 0:
                demographics.append(row)
            else:
                array = []
                people.append(row[0])
                survival.append(row[24])
                for i in range(0, len(row)):
                    array.append(row[i])
                peopleDict[row[0]] = array
                array = np.array(array)
                survivalData.append(array)
            line_count += 1
    return [people, demographics, survivalData, survival, peopleDict]
def slidingwindowsegment(sequence, create_segment, compute_error, max_error, seq_range=None):
    """
    Return a list of line segments that approximate the sequence.

    The list is computed using the sliding window technique.

    Parameters
    ----------
    sequence : sequence to segment
    create_segment : a function of two arguments (sequence, sequence range) that returns a line segment that approximates the sequence data in the specified range
    compute_error: a function of two argments (sequence, segment) that returns the error from fitting the specified line segment to the sequence data
    max_error: the maximum allowable line segment fitting error

    """
    if not seq_range:
        seq_range = (0,len(sequence)-1)
    start = seq_range[0]
    end = start
    array = []
    result_segment = create_segment(sequence,(seq_range[0],seq_range[1]))
    while end < seq_range[1]:
        end += 1
        test_segment = create_segment(sequence,(start,end))
        error = compute_error(sequence,test_segment)
        if error <= max_error:
            result_segment = test_segment
        else:
            array.append(result_segment)
            start = end-1

    if end == seq_range[1]:
        array.append(result_segment)
    return array
예제 #3
0
def convert_type(kind, value):
    """Converts string to python type"""
    if type(kind) == type(db.Int):
        return int(value)
    elif type(kind) == type(db.Int16):
        return int(value)
    elif type(kind) == type(db.Float):
        return float(value)
    elif type(kind) == type(db.String32):
        return str(value)
    elif type(kind) == type(db.StringN):
        return str(value)
    elif type(kind) == type(db.Time):
        return float(value)
    elif type(kind) == ndarray:
        array = []
        elements = value.split(';')
        if(elements[:-1] is None): # NB: Admir's spec says there shouldn't be a trailing semi-colon, but check anyways
            elements.pop() # null value
        if kind.dtype == int32:
            for element in elements:
                array.append(int(element))
            return append(db.IntArray, array)
        elif kind.dtype == float32:
            for element in elements:
                array.append(float(element))
            return append(db.FloatArray, array)
    return value
    def A(self):
        array = []

        for row in self.matrix:
            for number in row:
                if abs(number) > self.tol:
                    array.append(number)
        return array
예제 #5
0
def construct_kernel_matrix(data, gamma):
    array = []
    for i in range(len(data)):
        subarray = []
        for j in range(len(data)):
            subarray.append(gaussian_kernel(data[i], data[j], gamma))
        array.append(subarray)
    return matrix(array)
예제 #6
0
def arrays_to_dict_array(key1, values1, key2, values2):
    array = []
    for value1, value2 in zip(values1, values2):
        dictionary = {}
        dictionary[key1] = value1
        dictionary[key2] = value2
        array.append(dictionary)
    return array
예제 #7
0
def readfile(filename):
    array = []
    with open(filename, encoding="utf8") as f:
        for line in f:
            if len(line) > 1:
                line = line[0:len(line) - 1]
                array.append(line)
    return array
예제 #8
0
def read2array(filename, square=True):
  """
  Extract data from file and store in a 2D ndarray (or list of arrays
  if not square).  Blank or comment lines are ignored.

  Parameters:
  -----------
  filename: String
     Path to file containing the data to be read.
  square: Boolean
     If True:  assume all lines contain the same number of (white-space
               separated) values, store the data in a transposed 2D ndarray.
     If False: Store the data in a list (one list-element per line), if
               there is more than one value per line, store as 1D ndarray.

  Returns:
  --------
  array: 2D ndarray or list
     See parameters description.

  Modification History:
  ---------------------
  2014-04-17  patricio  Initial implementation.
  """

  # Open and read the file:
  f = open(filename, "r")
  lines = f.readlines()
  f.close()

  # Remove comments and empty lines:
  nlines = len(lines)
  for i in np.arange(nlines, 0, -1):
    line = lines[i-1].strip()
    if line.startswith('#') or line == '':
      dummy = lines.pop(i-1)

  # Re-count number of lines:
  nlines = len(lines)

  # Extract values:
  if square:
    ncolumns = len(lines[0].split())
    array = np.zeros((nlines, ncolumns), np.double)
    for i in np.arange(nlines):
      array[i] = lines[i].strip().split()
    array = np.transpose(array)

  else:
    array = []
    for i in np.arange(nlines):
      values = lines[i].strip().split()
      if len(values) > 1:
        array.append(np.asarray(lines[i].strip().split(), np.double))
      else:
        array.append(np.double(values[0]))

  return array
예제 #9
0
def read2array(filename, square=True):
    """
  Extract data from file and store in a 2D ndarray (or list of arrays
  if not square).  Blank or comment lines are ignored.

  Parameters:
  -----------
  filename: String
     Path to file containing the data to be read.
  square: Boolean
     If True:  assume all lines contain the same number of (white-space
               separated) values, store the data in a transposed 2D ndarray.
     If False: Store the data in a list (one list-element per line), if
               there is more than one value per line, store as 1D ndarray.

  Returns:
  --------
  array: 2D ndarray or list
     See parameters description.

  Modification History:
  ---------------------
  2014-04-17  patricio  Initial implementation.
  """

    # Open and read the file:
    f = open(filename, "r")
    lines = f.readlines()
    f.close()

    # Remove comments and empty lines:
    nlines = len(lines)
    for i in np.arange(nlines, 0, -1):
        line = lines[i - 1].strip()
        if line.startswith("#") or line == "":
            dummy = lines.pop(i - 1)

    # Re-count number of lines:
    nlines = len(lines)

    # Extract values:
    if square:
        ncolumns = len(lines[0].split())
        array = np.zeros((nlines, ncolumns), np.double)
        for i in np.arange(nlines):
            array[i] = lines[i].strip().split()
        array = np.transpose(array)

    else:
        array = []
        for i in np.arange(nlines):
            values = lines[i].strip().split()
            if len(values) > 1:
                array.append(np.asarray(lines[i].strip().split(), np.double))
            else:
                array.append(np.double(values[0]))

    return array
    def IA(self):
        array = [0]

        non_zeros = 0
        for row in self.matrix:
            for number in row:
                if abs(number) > self.tol:
                    non_zeros += 1
            array.append(non_zeros)
        return array
    def JA(self):
        array = []

        for row in self.matrix:
            index = 0
            for number in row:
                if abs(number) > self.tol:
                    array.append(index)
                index += 1
        return array
예제 #12
0
def to_ieee_754(R):
    """Convert and return the rotation matrix as the full precision IEEE 754 byte array."""

    array = []

    for i in range(3):
        array.append([])
        for j in range(3):
            array[i].append(floatAsByteArray(R[i, j]))

    return array
예제 #13
0
파일: rotate.py 프로젝트: tlinnet/relax
def to_ieee_754(R):
    """Convert and return the rotation matrix as the full precision IEEE 754 byte array."""

    array = []

    for i in range(3):
        array.append([])
        for j in range(3):
            array[i].append(floatAsByteArray(R[i, j]))

    return array
예제 #14
0
def construct_unit_matrix(size):
    array = []

    for i in range(size):
        subarray = []
        for j in range(size):
            if i == j:
                subarray.append(1.0)
            else:
                subarray.append(0.0)
        array.append(subarray)

    return matrix(array)
def RoottoTensorflow(filepath, SvB):
    Tree = uproot.open(filepath)
    Tree = Tree[SvB]
    branches = Tree.arrays()
    array = []
    for item in branches['index']:
        subarray = []
        for subitem in Tree.keys()[1:]:
            subarray.append(branches[item][subitem])
        array.append(subarray)
    dataset = tf.data.Dataset.from_tensor_slices(array)
    dataset = dataset.cache(filename=filepath + '_' + SvB)
    return (dataset)
예제 #16
0
def rademacher_estimate(dataset,
                        hypothesis_generator,
                        num_samples=500,
                        random_seed=0):
    """
    Given a dataset, estimate the rademacher complexity

    Args:
      dataset: a sequence of examples that can be handled by the hypotheses
      generated by the hypothesis_generator

      hypothesis_generator: a function that generates an iterator over
      hypotheses given a dataset

      num_samples: the number of samples to use in estimating the Rademacher
      correlation
    """

    # TODO: complete this function
    #for ii in xrange(num_samples):
    #    if random_seed != 0:
    #        rademacher = coin_tosses(len(dataset), random_seed + ii)
    #    else:
    #        rademacher = coin_tosses(len(dataset))
    # R(H) = E_sig[max_h_in_H(1/m*sum_i_m(sig_i*h(x_i)))]
    # Do this whole thing num_samples times to get Expectation value
    m = len(dataset)
    expecation_final = 0.0
    for i in range(0, num_samples):
        if random_seed != 0:
            rademacher = coin_tosses(len(dataset), random_seed + i)
        else:
            rademacher = coin_tosses(len(dataset))
        array = []
        hyps = hypothesis_generator(dataset)
        for h in hyps:
            sum = 0.0
            for i in range(0, m):
                x = h.classify(dataset[i])
                #Convert hypothesis to +/- 1 from bool
                if (x == True):
                    x = 1
                else:
                    x = -1
                sum += rademacher[i] * x
            rad = sum / m
            array.append(rad)
        final = max(array)
        expecation_final += final
    expecation_final = expecation_final / num_samples
    return expecation_final
예제 #17
0
def rademacher_estimate(dataset, hypothesis_generator, num_samples=500,
                        random_seed=0):
    """
    Given a dataset, estimate the rademacher complexity

    Args:
      dataset: a sequence of examples that can be handled by the hypotheses
      generated by the hypothesis_generator

      hypothesis_generator: a function that generates an iterator over
      hypotheses given a dataset

      num_samples: the number of samples to use in estimating the Rademacher
      correlation
    """

    # TODO: complete this function
    #for ii in xrange(num_samples):
    #    if random_seed != 0:
    #        rademacher = coin_tosses(len(dataset), random_seed + ii)
    #    else:
    #        rademacher = coin_tosses(len(dataset))
    # R(H) = E_sig[max_h_in_H(1/m*sum_i_m(sig_i*h(x_i)))]
    # Do this whole thing num_samples times to get Expectation value
    m = len(dataset)
    expecation_final = 0.0
    for i in range(0,num_samples):
        if random_seed != 0:
            rademacher = coin_tosses(len(dataset), random_seed + i)
        else:
            rademacher = coin_tosses(len(dataset))
        array = []
        hyps = hypothesis_generator(dataset)
        for h in hyps:
            sum = 0.0
            for i in range(0,m):
                x = h.classify(dataset[i])
                #Convert hypothesis to +/- 1 from bool
                if (x == True):
                    x = 1
                else:
                    x = -1
                sum += rademacher[i]*x
            rad = sum/m
            array.append(rad)
        final = max(array)
        expecation_final += final
    expecation_final = expecation_final/num_samples
    return expecation_final
예제 #18
0
def limitGenes(geneIndices, patientData, genes):
    """ Limit our gene data to selected genes"""
    newGeneData = []
    newGeneDict = []
    for patient in patientData:
        array = []
        dict = {}
        for i in range(len(patient)):
            if i in geneIndices:
                array.append(patient[i])
                dict[genes[i - 1]] = float(patient[i])
        newGeneData.append(array)
        newGeneDict.append(dict)
    newGeneData = np.array(newGeneData)
    return [newGeneData, newGeneDict]
예제 #19
0
파일: db.py 프로젝트: olfa-lab/Voyeur
 def insert_stream(self, stream, trial_group):
     """Inserts stream data values"""
     row = trial_group.Events.row
     for key, value in stream.iteritems():
         if type(value) == ndarray:
             array = self.h5file.get_node(trial_group, key)
             array.append(value)
         elif value is None:
             continue
         else:
             #row = trial_group.Events.row
             row[key] = value
     row.append()
     trial_group.Events.flush()
     self.h5file.flush()
def RoottoDataset(filepath, SvB):
    names = [
        'Index', 'MET', "METPhi", "j1PT", "mjj", "mjj_13", "mjj_23",
        "mjjoptimized", "j1Eta", "j2Eta", "j3Eta", "j1Phi", "j2Phi", "j3Phi",
        "j2PT", "j3PT", "weight"
    ]
    Tree = uproot.open(filepath)
    Tree = Tree[SvB]
    branches = Tree.arrays()
    array = []
    for item in branches['index']:
        subarray = []
        for subitem in Tree.keys():
            subarray.append(branches[item][subitem])
        array.append(subarray)

    dataset = pd.DataFrame(array)
    dataset.columns = names
    dataset.drop("Index", axis=1, inplace=True)
    return (dataset)
예제 #21
0
def connection(sc, addr):

    while True:
        puerto = str(sc.recv(1024))

        print "Ah escuchado un dato"
        jj = prender(puerto)
        array.append(jj)
        print array

        averaguehour = sacapromedio(array)

        print "aca esta el averaguehour"
        print averaguehour

        #print "Start : %s" % time.ctime()
        #time.sleep( 5 )
        #print "End : %s" % time.ctime()

        print "acamesalideprender"
        sc.send("prendido")
예제 #22
0
import csv


load = open("kitchenmeallog.csv","rU")
array = []


for line in load.readlines()[1:]:
    splt = line.split(',')
    if splt[2] == "lunch" or splt[2] == "Lunch":
        add = splt[4]
        try:
            out = float(splt[4])
        except:
            out = float(0)
        array.append(out)

c = rfft(array)
abso = abs(c)**2
c_10 = copy(c)
c_2 = copy(c)

maxc = len(c)
max10 = (maxc//10)
max2 = (maxc//100)*2

for i in range(0, maxc):
    if max10 < i:
        c_10[i]=0
    if max2 < i:
        c_2[i]=0
예제 #23
0
from operator import itemgetter
from numpy import array
import numpy as np
import csv
import matplotlib.pyplot as plt
from matplotlib import pylab

def column(matrix, i):
    return [row[i] for row in matrix]

array = []

my_data = recfromcsv('ETA vs cancel GC-Manila.csv', usecols=(0,1,2,3))
numrows = len(my_data)
print(numrows)

for row in range(numrows):
	array.append(my_data[row])

x = column(array,3)
y = column(array,2)
colors = np.random.rand(numrows)

plt.scatter(x, y, c=colors, s=20, edgecolors='None', alpha=0.75)
plt.xlim(0,2000)
plt.ylim(0,2000)
plt.title('GC Manila:- Every point is a booking' )
plt.xlabel('Time to pax cancel (in seconds) ')
plt.ylabel('First ETA (in seconds)')
plt.show()
예제 #24
0
        DFS(r+1,c,num)
    if array[r][c-1] and not visited[r][c-1]: #Check on left
        DFS(r,c-1,num)
    if c != col-1 and array[r][c+1] and not visited[r][c+1]: #Check on right
        DFS(r,c+1,num)
    return

def printArr(arr):
    for row in array:
        print(row)

f = open('Question 4/input_question_4','r')
array=[]
for line in f:
    line = line.strip()
    array.append( [int(n) for n in line.split()] )
row = len(array)
col =  len(array[0])
print(row,col)
print("Original:")  
printArr(array)
print("\n")

visited = [[0]*col]*row
print("test",array[1][0], "visit", visited[1][0])
printArr(visited)
for r in range(row):
    for c in range(col):
        print("RC{},{} val{} visit{}".format(r,c,array[r][c],visited[r][c]))
        visited[r][c] = 0
contour_number = 1
예제 #25
0

arr=[]
file = open("downloaded1.csv",'rt')
samples=csv.reader(file)
c=0
for i in samples:
    c+=1
    
    if c==2:
        x=i[1]
        break

for i in samples:
    if i[1]!=x:
        arr.append(i)


df=pd.DataFrame(data=arr,columns=("types","posts"))
print(len(df.columns))
print(df)


# In[7]:


def labelencode(df):
    data=df['types']
    values=np.array(data)
    label=LabelEncoder()
    intencode=label.fit_transform(values)
예제 #26
0
    cost_function)

correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

cost_history = np.empty(shape=[1], dtype=float)
y_true, y_pred = None, None
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(training_epochs):
        _, cost = sess.run([optimizer, cost_function],
                           feed_dict={
                               X: tr_features,
                               Y: tr_labels
                           })
        cost_history = np.append(cost_history, cost)

    y_pred = sess.run(tf.argmax(y_, 1), feed_dict={X: ts_features})
    y_true = sess.run(tf.argmax(ts_labels, 1))
    print(
        "Test accuracy: ",
        round(sess.run(accuracy, feed_dict={
            X: ts_features,
            Y: ts_labels
        }), 3))

fig = plt.figure(figsize=(10, 8))
plt.plot(cost_history)
plt.axis([0, training_epochs, 0, np.max(cost_history)])
plt.show()
예제 #27
0
                enter = cl[i]
                leave = cl[end_i]
                r = (leave - enter) / enter
                S.append(r)
    return array(S)


results = retur_pro(yyy[7:], X_tes[7:, [1]], 24)
#%% Allocation

prob = y_prob[7:]
array = []
for i in range(0, len(prob), 24):

    end_i = i + 24
    if end_i > len(prob):
        break

    if prob[i] > 0.9:
        array.append(1)
    elif prob[i] > 0.8:
        array.append(0.8)
    elif prob[i] > 0.7:
        array.append(0.6)
    elif prob[i] > 0.6:
        array.append(0.4)
    elif prob[i] > 0.5:
        array.append(0.2)
    elif prob[i] > 0.33:
        array.append(0.1)