def main():
    # path = 'ExampleData/input_files/'
    # endPath = 'ExampleData/test/'
    path, endPath = getFilePath()
    reader_fgi = list(csv.reader(open(path + "fGI_stats.csv", "rt", encoding="ascii"), delimiter=","))
    reader_core = list(csv.reader(open(path + "Core_attfGI.csv", "rt", encoding="ascii"), delimiter=","))
    genomeListing = list(open(path + "db.txt", "r"))

    genomeClusterDict = pickle.load(open(path + "genomeCluster.dict", "rb"))
    genomeLocusDict = pickle.load(open(path + "genomeLocus.dict", "rb"))
    coreDict, fgiDict = createCoreClusterDict(reader_core)

    # genome = 'E00002'
    genomeIdDict = {}
    index = 3
    for genome2 in genomeListing:
        if "\n" in genome2:
            genome2 = genome2[0:-1]
        genomeIdDict[genome2] = index
        index += 1

    for genome in genomeIdDict:
        genomeDict = createfgiInsertDict(reader_fgi, genome)
        referenceList = createfGIFeatures(
            genomeDict, coreDict, fgiDict, genomeClusterDict, genomeLocusDict, genome, genomeIdDict[genome]
        )
        writeFile(endPath, genome, referenceList)

    genomeDict = createfgiInsertDict(reader_fgi, genome)
    referenceList = createfGIFeatures(
        genomeDict, coreDict, fgiDict, genomeClusterDict, genomeLocusDict, genome, genomeIdDict[genome]
    )

    writeFile(endPath, genome, referenceList)
Example #2
0
    def solve(self):
        filename_old = "http://py.mooctest.net:8081/dataset/population/population_old.csv"
        filename_total = "http://py.mooctest.net:8081/dataset/population/population_total.csv"
        reader_old = csv.reader(urllib.urlopen(filename_old))
        reader_total = csv.reader(urllib.urlopen(filename_total))
        count_line_old = 3
        old_num = []
        for row in reader_old:
            if count_line_old > 0:
                count_line_old -= 1;
                continue;
            old_num.append(int(row[1]))
        count_line_total = 5
        total_num = []
        for row in reader_total:
            if count_line_total > 0:
                count_line_total -= 1
                continue
            total_num.append(int(row[4]))
        old_rate = []
        for i in range(len(old_num)):
            old_rate.append(100.0 * old_num[i-1] / total_num[i-1])

        a = pd.Series(old_rate)
        x = a.mean()
        std = a.std()
        var = a.var() # var = s^2
        z = t.isf(0.05, 31)
        mean_lower = x - std / math.sqrt(31) * z
        mean_upper = x + std / math.sqrt(31) * z
        std_lower = 31 * var / chi2.isf(0.05, 31)
        std_upper = 31 * var / chi2.isf(0.95, 31)
        result = [[mean_lower, mean_upper], [std_lower, std_upper]]
        print result
        return result
 def run(self, infile, label, outfile, existing=None):
     existing_rows = {}
     if existing:
         with open(existing, 'rU') as csvfile:
             reader = csv.reader(csvfile)
             existing_rows = dict([(x[0], x[1:]) for x in reader])
     with open(infile, 'rU') as csvfile:
         reader = csv.reader(csvfile)
         # Ignore the header
         next(reader)
         for file_name, true_val, pred_val in reader:
             if existing and file_name not in existing_rows:
                 raise Exception(
                     "The file name ({0}) doesn't exist in the existing file.".format(file_name))
             if not existing:
                 existing_rows[file_name] = ['', '']
             existing_rows[file_name] = (
                 _combine_values(existing_rows[file_name][0], true_val, label),
                 _combine_values(existing_rows[file_name][1], pred_val, label)
             )
     with open(outfile, 'w') as csvfile:
         writer = csv.writer(csvfile)
         writer.writerow(['file_name', 'true', 'predicted'])
         for k, v in existing_rows.items():
             writer.writerow((k, *v))
Example #4
0
 def foreignCards(self):
     self.sniff()
     # process all lines
     log = []
     cards = []
     lineNum = 0
     ignored = 0
     if self.delimiter:
         reader = csv.reader(self.data, delimiter=self.delimiter)
     else:
         reader = csv.reader(self.data, self.dialect)
     for row in reader:
         try:
             row = [unicode(x, "utf-8") for x in row]
         except UnicodeDecodeError, e:
             raise ImportFormatError(
                 type="encodingError",
                 info=_("The file was not in UTF8 format."))
         if len(row) != self.numFields:
             log.append(_(
                 "'%(row)s' had %(num1)d fields, "
                 "expected %(num2)d") % {
                 "row": u" ".join(row),
                 "num1": len(row),
                 "num2": self.numFields,
                 })
             ignored += 1
             continue
         card = self.cardFromFields(row)
         cards.append(card)
def i_check_anomaly_scores(step, check_file):
    check_file = res_filename(check_file)
    predictions_file = world.output
    try:
        predictions_file = csv.reader(open(predictions_file, "U"), lineterminator="\n")
        check_file = csv.reader(open(check_file, "U"), lineterminator="\n")
        for row in predictions_file:
            check_row = check_file.next()
            if len(check_row) != len(row):
                assert False
            for index in range(len(row)):
                dot = row[index].find(".")
                if dot > 0 or (check_row[index].find(".") > 0
                               and check_row[index].endswith(".0")):
                    try:
                        decimal_places = min(len(row[index]), len(check_row[index])) - dot - 1
                        row[index] = round(float(row[index]), decimal_places)
                        check_row[index] = round(float(check_row[index]), decimal_places)
                    except ValueError:
                        pass
                if check_row[index] != row[index]:
                    print row, check_row
                    assert False
        assert True
    except Exception, exc:
        assert False, str(exc)
Example #6
0
def get_fluence(e_0=100.0):
    """
    Returns a function representing the electron fluence with the distance in CSDA units.

    Args:
        e_0 (float): The kinetic energy whose CSDA range is used to scale the distances.

    Returns:
        A function representing fluence(x,u) with x in CSDA units.

    """
    # List of available energies
    e0_str_list = list(map(lambda x: (os.path.split(x)[1]).split(".csv")[
        0], glob(os.path.join(data_path, "fluence", "*.csv"))))
    e0_list = sorted(list(map(int, list(filter(str.isdigit, e0_str_list)))))

    e_closest = min(e0_list, key=lambda x: abs(x - e_0))

    with open(os.path.join(data_path, "fluence/grid.csv"), 'r') as csvfile:
        r = csv.reader(csvfile, delimiter=' ', quotechar='|',
                       quoting=csv.QUOTE_MINIMAL)
        t = next(r)
        x = np.array([float(a) for a in t[0].split(",")])
        t = next(r)
        u = np.array([float(a) for a in t[0].split(",")])
    t = []
    with open(os.path.join(data_path, "fluence", "".join([str(e_closest), ".csv"])), 'r') as csvfile:
        r = csv.reader(csvfile, delimiter=' ', quotechar='|',
                       quoting=csv.QUOTE_MINIMAL)
        for row in r:
            t.append([float(a) for a in row[0].split(",")])
    t = np.array(t)
    f = interpolate.RectBivariateSpline(x, u, t, kx=1, ky=1)
    # Note f is returning numpy 1x1 arrays
    return f
Example #7
0
def get_cs(e_0=100, z=74):
    """
    Returns a function representing the scaled bremsstrahlung cross_section.

    Args:
        e_0 (float): The electron kinetic energy, used to scale u=e_e/e_0.
        z (int): Atomic number of the material.

    Returns:
        A function representing cross_section(e_g,u) in mb/keV, with e_g in keV.

    """
    # NOTE: Data is given for E0>1keV. CS values below this level should be used with caution.
    # The default behaviour is to keep it constant
    with open(os.path.join(data_path, "cs/grid.csv"), 'r') as csvfile:
        r = csv.reader(csvfile, delimiter=' ', quotechar='|',
                       quoting=csv.QUOTE_MINIMAL)
        t = next(r)
        e_e = np.array([float(a) for a in t[0].split(",")])
        log_e_e = np.log10(e_e)
        t = next(r)
        k = np.array([float(a) for a in t[0].split(",")])
    t = []
    with open(os.path.join(data_path, "cs/%d.csv" % z), 'r') as csvfile:
        r = csv.reader(csvfile, delimiter=' ', quotechar='|',
                       quoting=csv.QUOTE_MINIMAL)
        for row in r:
            t.append([float(a) for a in row[0].split(",")])
    t = np.array(t)
    scaled = interpolate.RectBivariateSpline(log_e_e, k, t, kx=3, ky=1)
    m_electron = 511
    z2 = z * z
    return lambda e_g, u: (u * e_0 + m_electron) ** 2 * z2 / (u * e_0 * e_g * (u * e_0 + 2 * m_electron)) * (
        scaled(np.log10(u * e_0), e_g / (u * e_0)))
Example #8
0
def load_csv(filename):
    """
    loads csv to a data_set
    :param filename: csv file input
        attr0,attr1,attr2,attr3,class1
        attr0,attr1,attr2,attr3,class1
        attr0,attr1,attr2,attr3,class1
        ...

    :return  data_set: a list of instances as array of attributes
        data_set = [
            ['attr0','attr1','attr2','attr3','class1'],
            ['attr0','attr1','attr2','attr3','class2'],
            ['attr0','attr1','attr2','attr3','class1'],
            ...
        ]
    """
    if isinstance(filename, file):
        lines = csv.reader(filename)
    else:
        lines = csv.reader(open(filename, "rb"))
    data_set = list(lines)
    for i in range(1, len(data_set)):
        data_set[i] = [x for x in data_set[i]]
    return data_set
Example #9
0
    def _list_outputs(self, test_output, expected_output):
        """
        Returns outputs from test outputs and expected outputs.  To be compared
        in the test.

        Parameters:
            - test_output: application's output name
            - expected_output: file name of the expected output
        Output:
            - test_list: the contents of the test output in a list
            - output_list: the contents of the expected output in a list
        """
        # Open the files
        test_file = open(test_output, 'r')
        expected_file = open(expected_output, 'r')

        # Create respective reader and writers
        test_reader = csv.reader(test_file)
        expected_reader = csv.reader(expected_file)

        # Listify
        test_list = list(test_reader)
        expected_list = list(expected_reader)

        # Close the files
        test_file.close()
        expected_file.close()

        return test_list, expected_list
Example #10
0
def main(pre_processed, genome_sam):

	read_SJ = defaultdict(set)
	black_list = set([])

	# for row in csv.reader(open(dust), delimiter = '>'):

	# 	black_list.add(row[1])

	# for row in csv.reader(open(repbase), delimiter = '\t'):

	# 	black_list.add(row[9])

	for row in csv.reader(open(genome_sam), delimiter = '\t'):

		if row[1]=="0" or row[1]=="16":

			black_list.add(row[0])

	for row in csv.reader(open(pre_processed), delimiter = '\t'):

		read, flag, tag, start, cigar, seq, qual = row

		SJ = tag.split("|")[0]
		read_SJ[read].add(SJ)


	for row in csv.reader(open(pre_processed), delimiter = '\t'):

		read, flag, tag, start, cigar, seq, qual = row

		if (read in black_list)==False and len(read_SJ[read])==1:
			print "\t".join(row)
def main():
    '''
    Script is meant to be called from terminal command line. Look at help for more
    information on the inputs.
    '''
    
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help = 'Input file name ending in .csv')
    parser.add_argument('-o', '--output', help ='Output file name ending in .csv')
    parser.add_argument('-c', '--columns', action='store', nargs = '*', help = 'Column index OR title. The output columns will be in the same order as the user input.')

    args = parser.parse_args()
    
    with open(args.output,'w') as output_file:
        w = csv.writer(output_file)
    
        with open(args.input, 'r') as input_file:
            reader = csv.reader(input_file)
            header = reader.next()
            
            ind = [n if n.isdigit() else header.index(n) for n in args.columns]
            included_cols = map(int, ind)
            
            fid = csv.reader(input_file, delimiter = ',')
            
            header_new = [header[i] for i in included_cols]
            w.writerow(header_new)
            for row in fid:
                item = [row[i] for i in included_cols]
                w.writerow(item)
def dataImport():
	global testData
	global trainData

	marketingTrain = open('/Users/annu/Desktop/Deposit_Marketing/Training.csv', 'rb')
	trainData = list(csv.reader(marketingTrain, delimiter = ','))

	marketingTest = open('/Users/annu/Desktop/Deposit_Marketing/Testing.csv', 'rb')
	testData = list(csv.reader(marketingTest, delimiter = ','))

	trainData = numpy.array(trainData)
	testData = numpy.array(testData)
	
	#One of K encoding of categorical data
	encoder = preprocessing.LabelEncoder()
	for j in (1,2,3,4,6,7,8,10,14,15):
		trainData[:,j] = encoder.fit_transform(trainData[:,j])
		testData[:,j] = encoder.fit_transform(testData[:,j])

	#Converting numpy strings to floats
	trainData = trainData.astype(numpy.float)
	testData = testData.astype(numpy.float)

	learnDecisionTree() #Good with handling categorical attributes
	learnRF() #Simple Ensemble classifier, must try
	learnSVM() #Tested
	learnLogReg() #Tested
Example #13
0
    def process(self, elevation=False, plot=False):
        route_reader = csv.reader(self.route_file)
        timestation_reader = csv.reader(self.timestation_file)

        for row in timestation_reader:
            if timestation_reader.line_num == 1:
                continue

            waypoint = GpsWaypoint(lat=float(row[0]), lon=float(row[1]), name=row[2])
            timestation = self.TimeStation(waypoint)
            self.timestations[timestation.number] = timestation

        timestation_index = 0
        for row in route_reader:
            if route_reader.line_num == 1:
                continue

            waypoint = GpsWaypoint(lat=float(row[0]), lon=float(row[1]), name=row[2])

            timestation_index = self.add_route_waypoint_to_timestation(waypoint, timestation_index)

        if elevation:
            for name, timestation in self.timestations.iteritems():
                timestation.download_elevation()
            if plot:
                for name, timestation in self.timestations.iteritems():
                    timestation.plot_grade()
        else:
            for name, timestation in self.timestations.iteritems():
                timestation.write_csv()
def featureMatching(input_keypoint_list, input_xy_list, input_img, template_img):
  #inputとtemplateのkeypoint_binary,xy_listを読み込む
  template_keypoint_list = []
  template_keypoint = csv.reader(open('template_keypoint_binary.csv', 'r'))
  for temp_key in template_keypoint:
    template_keypoint_list.append(map(int,temp_key))
  template_xy_list = []
  template_xy = csv.reader(open('template_keypoint_xy.csv', 'r'))
  for temp_xy in template_xy:
    template_xy_list.append(map(int,temp_xy))
  x_sum_img = len(input_img[0]) + len(template_img[0])
  sum_img = np.zeros((len(input_img), x_sum_img), np.uint8)
  sum_img = np.hstack((input_img, template_img))
  template_add_xsize = []
  #ハミング距離をとって0になったらマッチングとする
  for temp_y in xrange(0,len(template_keypoint_list)):
    for inp_y in xrange(0,len(input_keypoint_list)):
      calc_list = template_keypoint_list[temp_y] - input_keypoint_list[inp_y]
      #print 'template_xy' + str(template_xy_list[temp_y])
      #print 'input_xy' + str(input_xy_list[inp_y])
      if  all((x == 0 for x in calc_list)) == True:
        #マッチングした時のkeypoint_bineryと同じ場所のx,yの値を画像平面上にマッピングし、結果を出力
        print 'get match'
        temp_xy = template_xy_list[temp_y]
        inp_xy = input_xy_list[inp_y]
        template_add_xsize = [temp_xy[1] + len(input_img[0]),temp_xy[0]]
        input_change_xy = [inp_xy[1],inp_xy[0]]
        add_input = tuple(input_change_xy)
        add_template = tuple(template_add_xsize)
        cv2.circle(sum_img,add_input,3,(0,0,0),-1)
        cv2.circle(sum_img,add_template,3,(0,0,255),-1)
        #cv2.line(sum_img,add_input,add_template,(255,255,0),1)
        #print 'add_input = ' +str(add_input) + ' add_template = ' +str(add_template)
  print 'quit calc'
  cv2.imwrite('sum_img.tif', sum_img)
Example #15
0
def getScore(paramter):
	extration_file = file('extration.csv')
	lable_file = file('label.csv')
	e_reader = csv.reader(extration_file)
	l_reader = csv.reader(lable_file)

	scores = []
	for w in e_reader:
		scores.append(calScore(w,paramter))
	
	npscores = np.array(scores)
	#print type(npscores[0])
	argscores = np.argsort(-npscores)

	buy = []
	i = 0
	for j in l_reader:
	#	if 1 == int(j[0]):
	#		buy.append(i)
		buy.append(j)

		i = i + 1

	count = 0
	for i in range(50000):
		#if argscores[i] in buy:
		#	count  = count + 1
		#print scores[argscores[i]]
		print buy[argscores[i]][0]+','+buy[argscores[i]][1]
		i = i+ 1
Example #16
0
File: main.py Project: hychina/aes
def load_dataset(category):
    
    feature_file = ""
    score_file = ""

    if category == "train":
        feature_file = "../data/features/all_train.txt"
        score_file = "../data/new_score/new_train.tsv"
    elif category == "valid":
        feature_file = "../data/features/all_valid.txt"
        score_file = "../data/new_score/new_valid.tsv"

    features = csv.reader(open(feature_file, 'r'), delimiter = '\t')
    scores = csv.reader(open(score_file, 'r'), delimiter = '\t')
    feature_names = next(features)[1:]
    n_samples = get_num_lines(score_file)
    n_features = len(feature_names)

    data = np.empty((n_samples, n_features))
    score = np.empty((n_samples,))

    for i, row in enumerate(features):
        data[i] = np.asarray(row[1:])

    for i, row in enumerate(scores):
        score[i] = row[2]

    return Bunch(data = data, 
                 score = score, 
                 n_samples = n_samples, 
                 n_features = n_features, 
                 feature_names = feature_names) 
Example #17
0
    def getReader(self, file, dialect=False, delimiter=None, quotechar=None, skiprows=-1):
        #determine dialect, create csv parser
        if(dialect is False):
            lines = []
            #get sample
            for line in file:
                lines.append(line)
                if(len(lines) > 500 + skiprows):
                    break

            if skiprows > 0:
                lines = lines[skiprows:]
            #sniff last 20 lines
            lines = lines[-20:]
            dialect = csv.Sniffer().sniff("\n".join(lines),delimiters=delimiter)
            dialect.doublequote=True
            dialect.skipinitialspace=True
            if not quotechar is None:
                if quotechar == '':
                    dialect.quoting = csv.QUOTE_NONE
                else:
                    dialect.quotechar = quotechar
                    dialect.quoting = csv.QUOTE_MINIMAL

            file.seek(0)
            reader = csv.reader(file, dialect)
        else:
            reader = csv.reader(file, dialect)

        return (reader, dialect)
def parse_scores(input, phevor2):
    """Parses either .simple or .phevor output and returns dictonary\
    of sores by gene."""
    scores = {}
    highs = 0
    if phevor2 == False:
        with open(input) as t:
            for line in csv.reader(t, delimiter="\t"):
                if line[0] == "RANK":
                    continue
                sco = -np.log10(float(line[2]))
                scores[line[1].strip()] = sco
                if sco > highs:
                    highs = sco

    if phevor2 == True:
        with open(input) as t:
            for line in csv.reader(t, delimiter="\t"):
                if "#" in line[0]:
                    continue
                sco = float(line[2])
                scores[line[1].strip()] = sco
                if sco > highs:
                    highs = sco
    return scores, highs
Example #19
0
def read_csv_data(train_name, test_name):

	Train_X = []
	Train_Y = []
	
	with open(train_name, 'r') as csvfile:
		my_reader = csv.reader(csvfile)
		for row in my_reader:
			row = [float(i) for i in row]
			Train_Y.append(row[0])
			Train_X.append(row[1:])
	
	Test_X = []
	Test_Y = []

	with open(test_name, 'r') as csvfile:
		my_reader = csv.reader(csvfile)
		for row in my_reader:
			row = [float(i) for i in row]
			Test_Y.append(row[0])
			Test_X.append(row[1:])



	return Train_X, Train_Y, Test_X, Test_Y
Example #20
0
def testAll(IsReferenceBuild = False, UpdateSVN = False):
    PMapFile = open(getProjectMapPath(), "rb")
    try:        
        # Validate the input.
        for I in csv.reader(PMapFile):
            if (len(I) != 2) :
                print "Error: Rows in the ProjectMapFile should have 3 entries."
                raise Exception()
            if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))):
                print "Error: Second entry in the ProjectMapFile should be 0" \
                      " (single file), 1 (project), or 2(single file c++11)."
                raise Exception()              

        # When we are regenerating the reference results, we might need to 
        # update svn. Remove reference results from SVN.
        if UpdateSVN == True:
            assert(IsReferenceBuild == True);
            updateSVN("delete",  PMapFile);
            
        # Test the projects.
        PMapFile.seek(0)    
        for I in csv.reader(PMapFile):
            testProject(I[0], int(I[1]), IsReferenceBuild)

        # Add reference results to SVN.
        if UpdateSVN == True:
            updateSVN("add",  PMapFile);

    except:
        print "Error occurred. Premature termination."
        raise                            
    finally:
        PMapFile.close()    
Example #21
0
def main(args):
  x_bar = []

  # Compute mean
  print('\n-----------------------------')
  print('(360) Tempo di risposta del server.')
  print('-----------------------------')
  for current in range(1, BATCH + 1):
    responseFile = '360response-' + str(current) + '.csv'
    respReader = csv.reader(open('../data/' + responseFile), delimiter = ',')
    x_bar_i = 0
    n = 0
    for row in respReader:
      if row[0] != 'time':
        if float(row[0]) > 20.0 and float(row[0]) < 180.0:
          x_bar_i += float(row[1])
          n += 1
    x_bar.append(x_bar_i / n)
  mu = 0
  for x_i in x_bar:
    mu += x_i / BATCH
  print('Stima puntuale della media: ' + str(mu))

  sigma2 = 0
  for x_i in x_bar:
    sigma2 += (x_i - mu)**2
  sigma2 /= (BATCH - 1)
  print('Varianza campionaria: ' + str(sigma2))

  a = mu - 1.96 * (sqrt(sigma2) / sqrt(BATCH))
  b = mu + 1.96 * (sqrt(sigma2) / sqrt(BATCH))
  print('Intervallo di confidenza: (' + str(a) + ', ' + str(b) + ')')

  # Compute mean
  print('\n-----------------------------')
  print('(360) Presence factor.')
  print('-----------------------------')
  for current in range(1, BATCH + 1):
    responseFile = '360presence-' + str(current) + '.csv'
    respReader = csv.reader(open('../data/' + responseFile), delimiter = ',')
    x_bar_i = 0
    n = 0
    for row in respReader:
      if row[0] != 'time':
        x_bar_i += float(row[1])
        n += 1
    x_bar.append(x_bar_i / n)
  mu = 0
  for x_i in x_bar:
    mu += x_i / BATCH
  print('Stima puntuale della media: ' + str(mu))

  sigma2 = 0
  for x_i in x_bar:
    sigma2 += (x_i - mu)**2 / (BATCH - 1)
  print('Varianza campionaria: ' + str(sigma2))

  a = mu - 1.96 * (sqrt(sigma2) / sqrt(BATCH))
  b = mu + 1.96 * (sqrt(sigma2) / sqrt(BATCH))
  print('Intervallo di confidenza: (' + str(a) + ', ' + str(b) + ')')
Example #22
0
def combineAll(initialFile, timeFile, outputFile):
    with open(initialFile, "rb") as initials, open(timeFile, "rb") as times, open(outputFile, "wb") as output:
        initialReader = csv.reader(initials, delimiter=",")
        timeReader = csv.reader(times, delimiter=",")
        writer = csv.writer(output, lineterminator="\n")

        for line1, line2 in izip(initialReader, timeReader):
            id1 = line1[0]
            id2 = line2[0]
            # print id1,id2

            while not id1 == id2:
                if id1 > id2:
                    line2 = timeReader.next()

                elif id2 > id1:
                    line1 = initialReader.next()

                id1 = line1[0]
                id2 = line2[0]

                # print id1,id2
            row = line2 + line1[1 : len(line1)]
            # print row

            writer.writerow(row)
Example #23
0
def merge_csv(first_csv_path, second_csv_path, result_csv_path, delimiter="|"):
    """Сливает два csv-файла с данными в один.
    У всех файлов должен быть один тип представления: obj | param | count
    """
    res = {}

    first_csv = open(first_csv_path, "r")
    reader = csv.reader(first_csv, delimiter=delimiter)
    for row in reader:
        key = (row[0], row[1])
        res.setdefault(key, 0)
        res[key] += int(row[2])
    first_csv.close()

    second_csv = open(second_csv_path, "r")
    reader = csv.reader(second_csv, delimiter=delimiter)
    for row in reader:
        key = (row[0], row[1])
        res.setdefault(key, 0)
        res[key] += int(row[2])
    second_csv.close()

    result_csv = open(result_csv_path, "w")
    writer = csv.writer(result_csv, delimiter=delimiter)
    for key, value in res.items():
        writer.writerow([key[0], key[1], value])
    result_csv.close()
Example #24
0
def main():
    """main function.
    Args:
        parsers (inst): Parser class intance for terminal input.
    Returns:
        None
    Examples:
        >>> main()
        >>> The URL you\'ve submitted is INVALID, enter VALID URL.
    """
    parser = argparse.ArgumentParser()

    parser.add_argument('-u', '--file', help='Enter URL Link to CSV File')
    parser.add_argument('-c', '--servers', help='Enter number of Servers')

    args = parser.parse_args()

    try:
        if args.file:
            grab_file = urllib2.urlopen(args.file)
            read_file = csv.reader(grab_file)
            for row in read_file:
                simulate_one_server(int(row[0]), int(row[2]))
        elif args.servers:
            grab_file = urllib2.urlopen('http://s3.amazonaws.com/cuny-is211-spring2015/requests.csv')
            read_file = csv.reader(grab_file)
            simulate_many_servers(read_file, args.servers)
        else:
            print 'Invalid attempt, not a server, not a url'

    except urllib2.URLError as url_err:
        print 'The URL you\'ve submitted is INVALID, enter VALID URL'
        raise url_err
def main(opt):

    # Look for more than one value for each attribute
    keep = [] # this holds column indices that have more than one value
    with open(opt.inputFile, 'rU') as fin:
        fin = csv.reader(fin, delimiter='\t')
        row = fin.next()  # the header
        last = fin.next()  # this holds the last value of each attribute
        #for i, row in enumerate(fin.__iter__()):
        for i, row in enumerate(fin.__iter__()):
            for j, val in enumerate(row):
                if j not in keep and row[j] != last[j]:
                    keep.append(j)

    print 'Writing', len(keep), 'out of', len(last), 'attributes which had more than one value.'
        
    # Write out the columns we want to keep
    with open(opt.inputFile, 'rU') as fin:
        fin = csv.reader(fin, delimiter='\t')
        with open(opt.outputFile, 'w') as fout:
            fout = csv.writer(fout, delimiter='\t')
            for i, row in enumerate(fin.__iter__()):
                orow = []
                for j in keep:
                    orow.append(row[j])
                fout.writerow(orow)

    return 0
Example #26
0
    def __init__(self, fpath, newnames=None, delimiter=None, transpose=False):
        f = open(fpath, "rb")
        if delimiter is None:
            dialect = csv.Sniffer().sniff(f.read(1024))
#            dialect = csv.Sniffer().sniff(f.read(1024), ',:|\t')
            f.seek(0)
            data_stream = csv.reader(f, dialect)
        else:
            data_stream = csv.reader(f, delimiter=delimiter)
        if transpose:
            transposed = transpose_table(list(data_stream))
            data_stream = iter(transposed)
        else:
            transposed = None
        self.fpath = fpath
        if newnames is not None:
            #TODO: move this junk out of the class
            basename = os.path.splitext(os.path.basename(fpath))[0]
            for k in newnames:
                m = self.eval_re.match(newnames[k])
                if m:
                    eval_str = m.group(1)
                    newnames[k] = eval_with_template(eval_str,
                                                     {'basename': basename})
        self.newnames = newnames
        self.transposed = transposed
        self.f = f
        self.data_stream = data_stream
        self._fields = None
        self._field_names = None
        self._numlines = None
Example #27
0
def main():
	# load csv of x1,x2 values
   with open('traj.dat','rb') as f:
   
   		reader = csv.reader(f, delimiter = ',')
   		row_count = sum(1 for low in reader)
   		print(row_count)
   # initialize an array with the number of rows
   x1all = numpy.zeros((1,row_count))
   x2all = numpy.zeros((1,row_count))
   with open('traj.dat','rb') as f:		
   		reader = csv.reader(f, delimiter = ',')
   		idx = 0;
   		for row in reader:
   			print(row)
   			
   			x1all[0,idx] = float(row[0])
   			x2all[0,idx] = float(row[1])
   			#print("x1: " + x1all[0,idx] ) #" x2:" + x2)
	
			idx = idx + 1
   print(x1all[0,1])

   
   for i in range(len(x1all)):
		# compute I
	    x1 = x1all[0,i]
	    x2 = x2all[0,i]
	    gainx = numpy.array([[0],[0]])
	    I = computecurrent(x1,x2,gainx)
	    print(I)
Example #28
0
def combineTimes(acceptedTimeFile, earliestTimeFile, outputFile):
    with open(acceptedTimeFile, "rb") as aF, open(earliestTimeFile, "rb") as eF, open(outputFile, "wb") as output:
        acceptedReader = csv.reader(aF, delimiter=",")
        earliestReader = csv.reader(eF, delimiter=",")
        writer = csv.writer(output, lineterminator="\n")

        for line1, line2 in izip(acceptedReader, earliestReader):
            id1 = line1[0]
            id2 = line2[0]
            # print id1,id2

            while not id1 == id2:

                if id1 > id2:
                    line2 = earliestReader.next()

                elif id2 > id1:
                    line1 = acceptedReader.next()

                id1 = line1[0]
                id2 = line2[0]
                # print id1,id2
                # print id1

            accepted = line1[1]
            earliest = line2[1]

            writer.writerow([id1, accepted, earliest])
Example #29
0
 def updateDelimiter(self):
     self.dialect = None
     if not self.delimiter:
         try:
             self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10]))
         except:
             try:
                 self.dialect = csv.Sniffer().sniff(self.data[0])
             except:
                 pass
     if self.dialect:
         reader = csv.reader(self.data, self.dialect)
     else:
         if not self.delimiter:
             if "\t" in self.data[0]:
                 self.delimiter = "\t"
             elif ";" in self.data[0]:
                 self.delimiter = ";"
             elif "," in self.data[0]:
                 self.delimiter = ","
             else:
                 self.delimiter = " "
         reader = csv.reader(self.data, delimiter=self.delimiter)
     try:
         self.numFields = len(reader.next())
     except:
         raise ImportFormatError(
             type="encodingError",
             info=_("File is not encoded in UTF-8"))
    def test_build(self):
        edgepath = '/Users/erickpeirson/Downloads/Topic-Affinity-Propagation/edge.txt'
        distpath = '/Users/erickpeirson/Downloads/Topic-Affinity-Propagation/distribution.txt'

        graph = nx.Graph()

        # Load edge data into Graph.
        with open(edgepath, 'r') as f:
            reader = csv.reader(f, delimiter=' ')
            for line in reader:
                try:
                    graph.add_edge(int(line[1]), int(line[2]), weight=float(line[3]))
                except:
                    pass
        authors = { n:n for n in graph.nodes() }

        # Load dist data into atheta.
        atheta = {}
        with open(distpath, 'r') as f:
            reader = csv.reader(f, delimiter=' ')
            i = 0
            for line in reader:
                data = line[1:-1]
                if len(data) > 0:
                    if i in graph.nodes():
                        atheta[i] = np.array([ float(d) for d in data ])
                    i += 1


        # Estimate params.
        tapmodel = TAPModel(graph, atheta)
        tapmodel.build()
import csv 
fields = []
rows = []
with open('dname.csv', 'r') as t1, open('fname.csv', 'r') as t2:
    fileone = t1.readlines()
    filetwo = t2.readlines()
csvreader = csv.reader(fileone)
csvreader2 = csv.reader(filetwo)
# fields = next(csvreader)
array = []
array2 = []
for row in csvreader:
        rows.append(row)
        array.append(int(row[10]))
nt = []
with open('done.csv', 'w') as csvfile: 
    # creating a csv writer object 
    csvwriter = csv.writer(csvfile) 
f = open("demo1.txt", "a")

c = 0
for ro in csvreader2:
      c=c+1
      tag = ro[10].replace('="','')
      tag = tag.replace('"','')
      tag = int(tag)
      if tag not in array:
        print (tag)
        f.write(str(tag)+"\n")
        nt.append(tag)
print(len(nt))
Example #32
0
# -*- coding: utf-8 -*-
"""
Created on Sat Feb  6 17:51:23 2021

@author: athir
"""
import csv
import random

edited_data = []

with open('speech_blocks_objects.txt') as speechblocks:
    speechblocks_objects = speechblocks.read().splitlines() 

with open('train100k.txt') as training_data:
    data_reader = csv.reader(training_data, delimiter='\t')

    for line in data_reader:
        if line[0] == 'CapableOf':
            new_line = []
            new_line.append(line[0])
            new_line.append(line[1])
            
            max_scene_length = random.randint(0, 5)
            
            object_list = []
            for i in speechblocks_objects:
                if i + ' ' in line[2] or line[2].endswith(i):
                    object_list.append(i)
            while(len(object_list) < max_scene_length):
                random_number = random.randint(0, 733)
Example #33
0
def main(method):

    with open('pitch-type.csv', 'r') as f:
        reader = csv.reader(f)
        for type in reader:
            implementDP(type=type, method=method)
Example #34
0
import cv2
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from keras.models import Sequential
from keras.layers import Flatten, Dense, Lambda, Cropping2D
from keras.layers import Convolution2D
from keras import backend as K

# Load the training data taken from the simulator in Training Mode
samples = []
with open('./Data/driving_log.csv') as csvfile:
    reader = csv.reader(csvfile)
    for line in reader:
        samples.append(line)

# Split the data in training and validation sets
train_samples, validation_samples = train_test_split(samples, test_size=0.35)

# Define correction factor
correction = 0.1


# Define generator with an extra argument to distinguish between training and prediction mode
def generator(samples, batch_size=32, mode="prediction"):
    # In case of training feed center, left and right camera images to the model
    if mode == "training":
        img_per_row = 3
Example #35
0
def train_model(test_on, n_epochs, loss_train, patience, select_arti, corpus_to_train_on, batch_norma, filter_type,
                to_plot, lr, delta_test, config, speakers_to_train_on = "", speakers_to_valid_on = "", relearn = False):
    """
    :param test_on: (str) one speaker's name we want to test on, the speakers and the corpus the come frome can be seen in
    "fonction_utiles.py", in the function "get_speakers_per_corpus'.

    :param n_epochs: (int)  max number of epochs for the training. We use an early stopping criterion to stop the training,
    so usually we dont go through the n_epochs and the early stopping happends before the 30th epoch (1 epoch is when
    have trained over ALL the data in the training set)

    :param loss_train: (int) alpha in the combined loss . can be anything between 0 and 100.
    the loss is the combinated loss alpha*rmse/1000+(1-alpha)*pearson.

    :param patience: (int) the number successive epochs with a validation loss increasing before stopping the training.
    We usually set it to 5. The more data we have, the smaller it can be (i think)

    :param select_arti: (bool) always true, either to use the trick to only train on available articulatory trajectories,
    fixing the predicted trajectory (to zero) and then the gradient will be 0.

    :param corpus_to_train_on: (list) list of the corpuses to train on. Usually at least the corpus the testspeaker comes from.
    (the testspeaker will be by default removed from the training speakers).

    :param batch_norma: (bool) whether or not add batch norm layer after the lstm layers (maybe better to add them after the
    feedforward layers? )

    :param filter_type: (int) either 0 1 or 2. 0 the filter is outside of the network, 1 it is inside and the weight are fixed
    during the training, 2 the weights get adjusted during the training

    :param to_plot: (bool) if true the trajectories of one random test sentence are saved in "images_predictions"

    :param lr: initial learning rate, usually 0.001

    :param delta_test: frequency of validation evaluation, 1 seems good

    :param config : either "spe" "dep", or "indep", for specific (train only on test sp), dependant (train on test sp
    and others), or independant, train only on other speakers

    :return: [rmse, pearson] . rmse the is the list of the 18 rmse (1 per articulator), same for pearson.
    """
    f_loss_train = open('training_loss.csv', 'w')
    f_loss_valid = open('valid_loss.csv', 'w')
    corpus_to_train_on = corpus_to_train_on[1:-1].split(",")
    speakers_to_train_on = speakers_to_train_on[1:-1].replace("'", "").replace('"', '').replace(' ', '').split(",")
    if speakers_to_train_on == [""] or speakers_to_train_on == []:
        train_on = which_speakers_to_train_on(corpus_to_train_on, test_on, config)
    else:
        train_on = speakers_to_train_on

    speakers_to_valid_on = speakers_to_valid_on[1:-1].replace("'", "").replace('"', '').replace(' ', '').split(",")
    if speakers_to_valid_on == [""] or speakers_to_valid_on == []:
        valid_on = []
    else:
        valid_on = speakers_to_valid_on
    print('train', train_on)
    print('valid', valid_on)
    print('test', test_on)
    name_corpus_concat = ""
    if config != "spec" : # if spec DOESNT train on other speakers
        for corpus in corpus_to_train_on:
            name_corpus_concat = name_corpus_concat + corpus + "_"

    name_file = test_on+"_"+config+"_"+name_corpus_concat+"loss_"+str(loss_train)+"_filter_"+\
                str(filter_type)+"_bn_"+str(batch_norma)

    if not os.path.exists("saved_models"):
        os.mkdir("saved_models")

    previous_models = os.listdir("saved_models")
    previous_models_2 = [x[:len(name_file)] for x in previous_models if x.endswith(".txt")]
    n_previous_same = previous_models_2.count(name_file)  # how many times our model was trained

    if n_previous_same > 0:
        print("this models has alread be trained {} times".format(n_previous_same))
    else :
        print("first time for this model")
    name_file = name_file + "_" + str(n_previous_same)  # each model trained only once ,
    # this script doesnt continue a previous training if it was ended ie if there is a .txt
    print("going to train the model with name",name_file)

    cuda_avail = torch.cuda.is_available()
    print(" cuda ?", cuda_avail)
    if cuda_avail:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    hidden_dim = 300
    input_dim = 429
    batch_size = 10
    output_dim = 18
    early_stopping = EarlyStopping(name_file, patience=patience, verbose=True)
    model = my_ac2art_model(hidden_dim=hidden_dim, input_dim=input_dim, name_file=name_file, output_dim=output_dim,
                            batch_size=batch_size, cuda_avail=cuda_avail,
                            filter_type=filter_type, batch_norma=batch_norma)
    model = model.double()
    file_weights = os.path.join("saved_models", name_file +".pt")
    if cuda_avail:
        model = model.to(device=device)
    if relearn:
        load_old_model = True
        if load_old_model:
            if os.path.exists(file_weights):
                print("previous model did not finish learning")
                loaded_state = torch.load(file_weights,map_location=device)
                model.load_state_dict(loaded_state)
                model_dict = model.state_dict()
                loaded_state = {k: v for k, v in loaded_state.items() if
                                k in model_dict}  # only layers param that are in our current model
                loaded_state = {k: v for k, v in loaded_state.items() if
                                loaded_state[k].shape == model_dict[k].shape}  # only if layers have correct shapes
                model_dict.update(loaded_state)
                model.load_state_dict(model_dict)



    files_per_categ, files_for_test = give_me_train_valid_test_filenames(train_on=train_on,test_on=test_on,config=config,batch_size= batch_size, valid_on=valid_on)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    categs_to_consider = files_per_categ.keys()
    with open('categ_of_speakers.json', 'r') as fp:
        categ_of_speakers = json.load(fp)  # dict that gives for each category the speakers in it and the available arti
    plot_filtre_chaque_epochs = False

    for epoch in range(n_epochs):
        weights = model.lowpass.weight.data[0, 0, :].cpu()
        if plot_filtre_chaque_epochs :
            plot_filtre(weights)
        n_this_epoch = 0
        random.shuffle(list(categs_to_consider))
        loss_train_this_epoch = 0
        loss_pearson = 0
        loss_rmse = 0
        for categ in categs_to_consider:
            files_this_categ_courant = files_per_categ[categ]["train"]
            random.shuffle(files_this_categ_courant)
            while len(files_this_categ_courant) > 0: # go through all  the files batch by batch
                n_this_epoch+=1
                x, y = load_np_ema_and_mfcc(files_this_categ_courant[:batch_size])

                files_this_categ_courant = files_this_categ_courant[batch_size:] #we a re going to train on this 10 files
                x, y = model.prepare_batch(x, y)
                if cuda_avail:
                    x, y = x.to(device=model.device), y.to( device=model.device)
                y_pred = model(x).double()
                if cuda_avail:
                    y_pred = y_pred.to(device=device)
                y = y.double ()
                optimizer.zero_grad()
                if select_arti:
                    arti_to_consider = categ_of_speakers[categ]["arti"]  # liste de 18 0/1 qui indique les arti à considérer
                    idx_to_ignore = [i for i, n in enumerate(arti_to_consider) if n == "0"]
                    y_pred[:, :, idx_to_ignore] = 0 #the grad associated to this value will be zero  : CHECK THAT
                   # y_pred[:,:,idx_to_ignore].detach()
                    #y[:,:,idx_to_ignore].requires_grad = False

                loss = criterion_both(y, y_pred,alpha=loss_train, cuda_avail = cuda_avail, device=device)
                loss.backward()
                optimizer.step()

                # computation to have evolution of the losses
                loss_2 = criterion_pearson(y, y_pred, cuda_avail = cuda_avail, device=device)
                loss_pearson += loss_2.item()
                loss_3 = torch.nn.MSELoss(reduction='sum')(y, y_pred)
                loss_rmse += loss_3.item()
                torch.cuda.empty_cache()
                loss_train_this_epoch += loss.item()

        torch.cuda.empty_cache()

        loss_train_this_epoch = loss_train_this_epoch/n_this_epoch
        print("Training loss for epoch", epoch, ': ', loss_train_this_epoch)
        f_loss_train.write(str(epoch) + ',' + str(loss_train_this_epoch) + ',' + str(loss_pearson/n_this_epoch/batch_size/18.*(-1.)) + ',' + str(loss_rmse/n_this_epoch/batch_size) + '\n')
        if epoch%delta_test == 0:  #toutes les delta_test epochs on évalue le modèle sur validation et on sauvegarde le modele si le score est meilleur
            loss_vali = 0
            n_valid = 0
            loss_pearson = 0
            loss_rmse = 0
            for categ in categs_to_consider:  # de A à F pour le moment
                files_this_categ_courant = files_per_categ[categ]["valid"]  # on na pas encore apprit dessus au cours de cette epoch
                while len(files_this_categ_courant) >0 :
                    n_valid +=1
                    x, y = load_np_ema_and_mfcc(files_this_categ_courant[:batch_size])
                    files_this_categ_courant = files_this_categ_courant[batch_size:]  # on a appris sur ces 10 phrases
                    x, y = model.prepare_batch(x, y)
                    if cuda_avail:
                        x, y = x.to(device=model.device), y.to(device=model.device)
                    y_pred = model(x).double()
                    torch.cuda.empty_cache()
                    if cuda_avail:
                        y_pred = y_pred.to(device=device)
                    y = y.double()  # (Batchsize, maxL, 18)
                    if select_arti:
                        arti_to_consider = categ_of_speakers[categ]["arti"]  # liste de 18 0/1 qui indique les arti à considérer
                        idx_to_ignore = [i for i, n in enumerate(arti_to_consider) if n == "0"]
                        y_pred[:, :, idx_to_ignore] = 0
                    #    y_pred[:, :, idx_to_ignore].detach()
                   #     y[:, :, idx_to_ignore].requires_grad = False
                    loss_courant = criterion_both(y, y_pred, loss_train, cuda_avail = cuda_avail, device=device)
                    loss_vali += loss_courant.item()
                    # to follow both losses
                    loss_2 = criterion_pearson(y, y_pred, cuda_avail = cuda_avail, device=device)
                    loss_pearson += loss_2.item()
                    loss_3 = torch.nn.MSELoss(reduction='sum')(y, y_pred)
                    loss_rmse += loss_3.item()

            loss_vali  = loss_vali/n_valid
            f_loss_valid.write(str(epoch) + ',' + str(loss_vali) + ',' +  str(loss_pearson/n_valid/batch_size/18.*(-1.)) + ',' + str(loss_rmse/n_this_epoch/batch_size) + '\n')
        torch.cuda.empty_cache()
        model.all_validation_loss.append(loss_vali)
        model.all_training_loss.append(loss_train_this_epoch)
        early_stopping(loss_vali, model)
        if early_stopping.early_stop:
            print("Early stopping, n epochs : ", model.epoch_ref + epoch)
            break

        if epoch > 0:  # on divise le learning rate par deux dès qu'on surapprend un peu par rapport au validation set
            if loss_vali > model.all_validation_loss[-1]:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = param_group['lr'] / 2
                    (param_group["lr"])


    if n_epochs > 0:
        model.epoch_ref = model.epoch_ref + epoch  # voir si ca marche vrmt pour les rares cas ou on continue un training
        model.load_state_dict(torch.load(os.path.join("saved_models",name_file+'.pt')))
        torch.save(model.state_dict(), os.path.join( "saved_models",name_file+".txt")) #lorsque .txt ==> training terminé !
    random.shuffle(files_for_test)
    x, y = load_np_ema_and_mfcc(files_for_test)
    print("evaluation on speaker {}".format(test_on))
    std_speaker = np.load(os.path.join(root_folder,"Preprocessing","norm_values","std_ema_"+test_on+".npy"))
    arti_per_speaker = os.path.join(root_folder, "Preprocessing", "articulators_per_speaker.csv")
    csv.register_dialect('myDialect', delimiter=';')
    with open(arti_per_speaker, 'r') as csvFile:
        reader = csv.reader(csvFile, dialect="myDialect")
        next(reader)
        for row in reader:
            if row[0] == test_on:
                arti_to_consider = row[1:19]
                arti_to_consider = [int(x) for x in arti_to_consider]

    rmse_per_arti_mean, pearson_per_arti_mean = model.evaluate_on_test(x, y, std_speaker = std_speaker, to_plot=to_plot
                                                                       , to_consider = arti_to_consider)


    """  RESULTS ON VALIDATION SET """

    pearson_valid = np.zeros((1,output_dim))
    for categ in categs_to_consider:  # de A à F pour le moment
        files_this_categ_courant = files_per_categ[categ]["valid"]  # on na pas encore apprit dessus au cours de cette epoch
        while len(files_this_categ_courant) > 0:
            x, y = load_np_ema_and_mfcc(files_this_categ_courant[:batch_size])
            files_this_categ_courant = files_this_categ_courant[batch_size:]  # on a appris sur ces 10 phrases
            arti_to_consider = categ_of_speakers[categ]["arti"]  # liste de 18 0/1 qui indique les arti à considérer

            rien, pearson_valid_temp = model.evaluate_on_test(x,y,std_speaker=1, to_plot=to_plot,
                                                                 to_consider=arti_to_consider,verbose=False)
            pearson_valid_temp = np.reshape(np.array(pearson_valid_temp),(1,output_dim))
            pearson_valid = np.concatenate((pearson_valid,pearson_valid_temp),axis=0)
    pearson_valid = pearson_valid[1:,:]
    pearson_valid[np.isnan(pearson_valid)] = 0
    pearson_valid = np.mean(pearson_valid,axis=0)
    print("on validation set :mean :\n",pearson_valid)
    print("training done for : ",name_file)

    articulators = ['tt_x', 'tt_y', 'td_x', 'td_y', 'tb_x', 'tb_y', 'li_x', 'li_y',
                    'ul_x', 'ul_y', 'll_x', 'll_y', 'la', 'lp', 'ttcl', 'tbcl', 'v_x', 'v_y']
    if not os.path.exists('model_results.csv'):
        with open('model_results.csv', 'a',newline = "") as f:
            writer = csv.writer(f)
            header = ["name file", "test on", "configuration", "train on (if not spec)", "loss",
                      "n_epochs", "evaluation with...", "average"] + articulators
            writer.writerow(header)

    # write result in csv
    with open('model_results.csv', 'a',newline = "") as f:
        writer = csv.writer(f)
        row_details = [name_file,test_on,config,name_corpus_concat,loss_train,model.epoch_ref]
        row_rmse = row_details + ["rmse_on_test", np.mean(rmse_per_arti_mean[rmse_per_arti_mean!=0])] +\
                   rmse_per_arti_mean.tolist()

        row_pearson = row_details + ["pearson_on_test", np.mean(pearson_per_arti_mean[pearson_per_arti_mean!=0])]+\
                      pearson_per_arti_mean.tolist()

        row_pearson_val = row_details + ["pearson_on_valid", np.mean(pearson_valid[pearson_valid !=0])] + \
                      pearson_valid.tolist()

        writer.writerow(row_rmse)
        writer.writerow(row_pearson)
        writer.writerow(row_pearson_val)

    weight_apres = model.lowpass.weight.data[0, 0, :].cpu()
    plot_allure_filtre = True
    if plot_allure_filtre:
        plot_filtre(weight_apres)

    return rmse_per_arti_mean, pearson_per_arti_mean
Example #36
0
def format_time(start, end):
    hours, rem = divmod(end - start, 3600)
    minutes, seconds = divmod(rem, 60)
    execut_time = "{:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes),
                                                  seconds)
    return execut_time


if __name__ == '__main__':
    with open(INPUT_FILE,
              encoding="utf8") as input, open(OUTPUT_FILE, 'w',
                                              newline='') as output:
        writer = csv.writer(output)
        writer.writerow(['videoId', 'text_density'])
        start = time.time()
        print(f'Start at {time.strftime("%H:%M")}')

        count = 0
        for row in csv.reader(input):
            if row[0] != 'videoId':
                videoId = row[0].replace('/watch?v=', '')
                t = get_text_density(videoId)
                print(f'[{videoId}] Text density: ', t)
                writer.writerow([videoId, t])
                output.flush()
            count += 1

        end = time.time()
        print(f'End at {time.strftime("%H:%M")}')
        print('Done in', format_time(start, end))
Example #37
0
    def handle(self, *args, **options):
        print('========== Заполнение базы данных ==========')

        print('====== Заполнение таблицы Section ==========')
        with open('content/1_Section.csv', 'r', encoding='utf-8') as f:
            reader = csv.reader(f, delimiter='|')
            for row in reader:
                Section.objects.create(name=row[0])
                print(row[0])
        print('=============================================')
        print()
        print('====== Заполнение таблицы Division ==========')
        with open('content/2_Division.csv', 'r', encoding='utf-8') as f:
            reader = csv.reader(f, delimiter='|')
            for row in reader:
                Division.objects.create(name=row[0], number=int(row[1]))
                print(row[0], row[1])
        print('=============================================')
        print()
        print('====== Заполнение таблицы Party =============')
        with open('content/3_Party.csv', 'r', encoding='utf-8') as f:
            reader = csv.reader(f, delimiter='|')
            for row in reader:
                Party.objects.create(name=row[0],
                                     propose_name=row[1],
                                     member_name=row[2])
                print(row[0])
        print('=============================================')
        print()
        print('====== Заполнение таблицы Roles =============')
        with open('content/4_Roles.csv', 'r', encoding='utf-8') as f:
            reader = csv.reader(f, delimiter='|')
            for row in reader:
                Roles.objects.create(name=row[0])
                print(row[0])
        print('=============================================')
        print()
        print('====== Заполнение таблицы Commissions =============')
        with open('content/5_Commissions.csv', 'r', encoding='utf-8') as f:
            reader = csv.reader(f, delimiter='|')
            for row in reader:
                Commissions.objects.create(name=row[0], description=row[1])
                print(row[0])
        print('=============================================')
        print()
        print('====== Заполнение таблицы Address =============')
        with open('content/6_Address.csv', 'r', encoding='utf-8') as f:
            reader = csv.reader(f, delimiter='|')
            for row in reader:
                Address.objects.create(name=row[0],
                                       full_address=row[1],
                                       address=row[2])
                print(row[0])
        print('=============================================')

        print()
        print('====== Заполнение таблицы Deputy =============')
        with open('content/7_Deputy.csv', 'r', encoding='utf-8') as f:
            reader = csv.reader(f, delimiter='|')
            for i, row in enumerate(reader):
                if i > 0:  # В первой строке заколовки колонок - пропускаем
                    party = Party.objects.get(name=row[13])
                    division = Division.objects.get(number=row[14])
                    if len(row[3]) > 0:
                        Deputy.objects.create(surname=row[0],
                                              name=row[1],
                                              second_name=row[2],
                                              date_of_birth=datetime.strptime(
                                                  row[3], "%Y-%m-%d").date(),
                                              address=row[4],
                                              site=row[5],
                                              telephone=row[6],
                                              email=row[7],
                                              skype=row[8],
                                              telegram=row[9],
                                              is_head=row[10],
                                              is_party=row[11],
                                              is_man=row[12],
                                              party_propose=party,
                                              division=division)
                    else:
                        Deputy.objects.create(surname=row[0],
                                              name=row[1],
                                              second_name=row[2],
                                              address=row[4],
                                              site=row[5],
                                              telephone=row[6],
                                              email=row[7],
                                              skype=row[8],
                                              telegram=row[9],
                                              is_head=row[10],
                                              is_party=row[11],
                                              is_man=row[12],
                                              party_propose=party,
                                              division=division)
                    print(row[0], row[1], row[2], row[3])

        # surname | name | second_name | date_of_birth | address | site | telephone | email | skype | telegram |
        # is_head | is_party | is_man | party_propose | division
        print('=============================================')

        print()
        print('=== Заполнение таблицы Members ====')
        with open('content/8_Members.csv', 'r', encoding='utf-8') as f:
            reader = csv.reader(f, delimiter='|')
            for row in reader:
                deputy = Deputy.objects.get(surname=row[0])
                commissions = Commissions.objects.get(name=row[1])
                role = Roles.objects.get(name=row[2])

                Members.objects.create(deputy=deputy,
                                       role=role,
                                       commissions=commissions)
                print(row[0], row[1], row[2])
        print('=============================================')

        print()
        print('=== Заполнение таблицы ReceptionSchedule ====')
        with open('content/9_Schedule.csv', 'r', encoding='utf-8') as f:
            reader = csv.reader(f, delimiter='|')
            for row in reader:
                deputy = Deputy.objects.get(surname=row[0])
                address = Address.objects.get(name=row[4])
                start = datetime.strptime(row[2], "%H:%M").time()
                end = datetime.strptime(row[3], "%H:%M").time()
                ReceptionSchedule.objects.create(date=datetime.strptime(
                    row[1], "%Y-%m-%d").date(),
                                                 start_time=start,
                                                 end_time=end,
                                                 deputy=deputy,
                                                 address=address)
                print(row[0], row[1], row[2], row[3], row[4])
        print('=============================================')

        print()
        print('=== Заполнение таблицы Assistants ====')
        with open('content/10_Assistants.csv', 'r', encoding='utf-8') as f:
            reader = csv.reader(f, delimiter='|')
            for row in reader:
                deputy = Deputy.objects.get(surname=row[0])
                Assistants.objects.create(surname=row[1],
                                          name=row[2],
                                          second_name=row[3],
                                          deputy=deputy)
                print(row[0], row[1], row[2], row[3])
        print('=============================================')
Example #38
0
def textual_position(path):
    data = []
    reader = csv.reader(open(path))
    for row in reader:
        data.append(row)
    geo_features_list = []
    for i in range(len(data)):
        for j in range(len(data[i])):
            vec = re.split('\,|\(|\)', data[i][j])
            tuple_new = (int(vec[1]), int(vec[2]))
            data[i][j] = tuple_new

    textual_position_list = []
    for d in data:
        textual_position = []
        for i in range(0, 17):
            m1 = d[i]
            m2 = (d[i][0] - 2, d[i][1] + 2)
            m3 = (d[i][0] - 2, d[i][1] + 1)
            m4 = (d[i][0] - 2, d[i][1])
            m5 = (d[i][0] - 1, d[i][1] - 2)
            m6 = (d[i][0], d[i][1] - 2)
            m7 = (d[i][0] + 1, d[i][1] - 2)
            m8 = (d[i][0] + 2, d[i][1])
            m9 = (d[i][0] + 2, d[i][1] + 1)
            m10 = (d[i][0] + 2, d[i][1] + 1)
            textual_position.append(m1)
            textual_position.append(m2)
            textual_position.append(m3)
            textual_position.append(m4)
            textual_position.append(m5)
            textual_position.append(m6)
            textual_position.append(m7)
            textual_position.append(m8)
            textual_position.append(m9)
            textual_position.append(m10)

        for i in range(17, 22):
            m1 = d[i]
            m2 = (d[i][0] - 2, d[i][1] - 1)
            m3 = (d[i][0] + 1, d[i][1])
            m4 = (d[i][0] + 2, d[i][1] - 1)

            textual_position.append(m1)
            textual_position.append(m2)
            textual_position.append(m3)
            textual_position.append(m4)

        for i in range(22, 27):
            m1 = d[i]
            m2 = (d[i][0] - 2, d[i][1] - 1)
            m3 = (d[i][0] - 1, d[i][1])
            m4 = (d[i][0] + 2, d[i][1] - 1)

            textual_position.append(m1)
            textual_position.append(m2)
            textual_position.append(m3)
            textual_position.append(m4)

        for i in range(27, 36):
            m1 = d[i]
            m2 = (d[i][0], d[i][1] + 1)
            m3 = (d[i][0], d[i][1] + 2)
            m4 = (d[i][0] - 2, d[i][1])
            m5 = (d[i][0] - 1, d[i][1] - 1)
            m6 = (d[i][0], d[i][1] - 1)
            m7 = (d[i][0] + 1, d[i][1] - 1)
            m8 = (d[i][0] + 2, d[i][1])

            textual_position.append(m1)
            textual_position.append(m2)
            textual_position.append(m3)
            textual_position.append(m4)
            textual_position.append(m5)
            textual_position.append(m6)
            textual_position.append(m7)
            textual_position.append(m8)

        for i in range(36, 48):
            m1 = d[i]
            m2 = (d[i][0] - 2, d[i][1])
            m3 = (d[i][0] - 2, d[i][1] + 1)
            m4 = (d[i][0], d[i][1] + 1)
            m5 = (d[i][0] + 1, d[i][1] + 1)
            m6 = (d[i][0] + 2, d[i][1])
            m7 = (d[i][0] + 1, d[i][1] - 1)
            m8 = (d[i][0], d[i][1] - 1)

            textual_position.append(m1)
            textual_position.append(m2)
            textual_position.append(m3)
            textual_position.append(m4)
            textual_position.append(m5)
            textual_position.append(m6)
            textual_position.append(m7)
            textual_position.append(m8)

        for i in range(48, 68):
            m1 = d[i]
            m2 = (d[i][0] - 2, d[i][1])
            m3 = (d[i][0] - 1, d[i][1] + 1)
            m4 = (d[i][0], d[i][1] + 1)
            m5 = (d[i][0] + 1, d[i][1] + 1)
            m6 = (d[i][0] + 2, d[i][1])
            m7 = (d[i][0] + 1, d[i][1] - 1)
            m8 = (d[i][0], d[i][1] - 1)
            m9 = (d[i][0] - 1, d[i][1] - 1)
            m10 = (d[i][0] - 1, d[i][1] + 2)
            m11 = (d[i][0], d[i][1] + 2)
            m12 = (d[i][0] + 1, d[i][1] + 2)
            m13 = (d[i][0] + 1, d[i][1] - 2)
            m14 = (d[i][0], d[i][1] - 2)
            m15 = (d[i][0] - 1, d[i][1] - 2)

            textual_position.append(m1)
            textual_position.append(m2)
            textual_position.append(m3)
            textual_position.append(m4)
            textual_position.append(m5)
            textual_position.append(m6)
            textual_position.append(m7)
            textual_position.append(m8)
            textual_position.append(m9)
            textual_position.append(m10)
            textual_position.append(m11)
            textual_position.append(m12)
            textual_position.append(m13)
            textual_position.append(m14)
            textual_position.append(m15)
        textual_position_list.append(textual_position)
    for i in range(len(textual_position_list)):
        for j in range(len(textual_position_list[i])):
            if textual_position_list[i][j][0] < 0:
                textual_position_list[i][j] = (0,
                                               textual_position_list[i][j][1])
            if textual_position_list[i][j][0] >= 480:
                textual_position_list[i][j] = (479,
                                               textual_position_list[i][j][1])
            if textual_position_list[i][j][1] < 0:
                textual_position_list[i][j] = (textual_position_list[i][j][0],
                                               0)
            if textual_position_list[i][j][1] >= 680:
                textual_position_list[i][j] = (textual_position_list[i][j][0],
                                               479)

    return textual_position_list
Example #39
0
def convert_csv_import(cr,
                       module,
                       fname,
                       csvcontent,
                       idref=None,
                       mode='init',
                       noupdate=False):
    '''Import csv file :
        quote: "
        delimiter: ,
        encoding: utf-8'''
    if not idref:
        idref = {}
    model = ('.'.join(fname.split('.')[:-1]).split('-'))[0]
    #remove folder path from model
    head, model = os.path.split(model)

    pool = pooler.get_pool(cr.dbname)

    input = cStringIO.StringIO(csvcontent)  #FIXME
    reader = csv.reader(input, quotechar='"', delimiter=',')
    fields = reader.next()
    fname_partial = ""
    if config.get('import_partial'):
        fname_partial = module + '/' + fname
        if not os.path.isfile(config.get('import_partial')):
            pickle.dump({}, file(config.get('import_partial'), 'w+'))
        else:
            data = pickle.load(file(config.get('import_partial')))
            if fname_partial in data:
                if not data[fname_partial]:
                    return
                else:
                    for i in range(data[fname_partial]):
                        reader.next()

    if not (mode == 'init' or 'id' in fields):
        _logger.error(
            "Import specification does not contain 'id' and we are in init mode, Cannot continue."
        )
        return

    uid = 1
    datas = []
    for line in reader:
        if (not line) or not reduce(lambda x, y: x or y, line):
            continue
        try:
            datas.append(map(lambda x: misc.ustr(x), line))
        except:
            _logger.error("Cannot import the line: %s", line)
    result, rows, warning_msg, dummy = pool.get(model).import_data(
        cr, uid, fields, datas, mode, module, noupdate, filename=fname_partial)
    if result < 0:
        # Report failed import and abort module install
        raise Exception(
            _('Module loading %s failed: file %s could not be processed:\n %s')
            % (module, fname, warning_msg))
    if config.get('import_partial'):
        data = pickle.load(file(config.get('import_partial')))
        data[fname_partial] = 0
        pickle.dump(data, file(config.get('import_partial'), 'wb'))
        cr.commit()
Example #40
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', type=int, action='store',
            dest='data_num', help='choose which data set to use')
    parser.add_argument('-m', type=int, action='store',
            dest='filter_method', help='choose which method to filter data')
    if len(sys.argv) != 5:
        print 'Command e.g.: python filterUserAndLocationByFreq.py -d 0(1,2) -m 0(1)'
        sys.exit(1)

    para = parser.parse_args()
    if para.data_num == 0:
        checkin_infile = settings["ROOT_PATH"] + settings["CHECKIN_PAIR_FILE1"]
        poi_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE1_1"]
        checkin_outfile = settings["ROOT_PATH"] + settings["FILTER_CHECKIN_PATR_FILE1"]
        loc_latlng = loadPoiInfo(poi_infile, para.data_num)
    elif para.data_num == 1:
        checkin_infile = settings["ROOT_PATH"] + settings["CHECKIN_PAIR_FILE2"]
        poi_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE2_1"]
        checkin_outfile = settings["ROOT_PATH"] + settings["FILTER_CHECKIN_PATR_FILE2"]
        loc_latlng = loadPoiInfo(poi_infile, para.data_num)
    elif para.data_num == 2:
        checkin_infile = settings["ROOT_PATH"] + settings["CHECKIN_PAIR_FILE3"]
        poi_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE3_3"]
        checkin_outfile = settings["ROOT_PATH"] + settings["FILTER_CHECKIN_PAIR_FILE3"]
        loc_latlng = loadPoiInfo(poi_infile, para.data_num)
    else:
        print 'Invalid choice of data set'
        sys.exit(1)

    # Filtering
    uid_set = set([])
    pid_set = set([])
    if para.filter_method == 0:
        pid_uid = defaultdict(set)
        uid_pid = defaultdict(set)
        tag = False
        for line in csv.reader(open(checkin_infile)):
            if not tag:
                tag = True
                continue
            entry = map(int, line[:-1])
            uid, pid1, pid2 = entry[0], entry[1], entry[4]
            if pid1 in loc_latlng:
                pid_uid[pid1].add(uid)
                uid_pid[uid].add(pid1)
            if pid2 in loc_latlng:
                pid_uid[pid2].add(uid)
                uid_pid[uid].add(pid2)
        removed_pid = set([])
        removed_uid = set([])
        while True:
            removed_pid.clear()
            for pid in pid_uid:
                pid_uid[pid] = pid_uid[pid] - removed_uid
                if len(pid_uid[pid]) < settings["FILTER_LOCATION_VISIT_NUM"]:
                    removed_pid.add(pid)
            for pid in removed_pid:
                pid_uid.pop(pid)
            removed_uid.clear()
            for uid in uid_pid:
                uid_pid[uid] = uid_pid[uid]-removed_pid
                if len(uid_pid[uid]) < settings["FILTER_USER_VISIT_NUM"]:
                    removed_uid.add(uid)
            for uid in removed_uid:
                uid_pid.pop(uid)
            if len(removed_uid) == 0:
                uid_set = set(uid_pid.keys())
                pid_set = set(pid_uid.keys())
                uid_pid = None
                pid_uid = None
                break
    elif para.filter_method == 1:
        data = [entry for entry in csv.reader(open(checkin_infile))]
        data = [map(int, entry[:-1]) for entry in data[1:]]
        pid_record = defaultdict(set)
        uid_record = defaultdict(set)
        for i, entry in enumerate(data):
            uid, pid1, pid2 = entry[0], entry[1], entry[4]
            if pid1 in loc_latlng and pid2 in loc_latlng:
                uid_record[uid].add(i)
                pid_record[pid1].add(i)
                pid_record[pid2].add(i)
        removed_record = set([])
        removed_pid = set([])
        removed_uid = set([])
        while True:
            for idx in removed_record:
                uid, pid1, pid2 = data[idx][0], data[idx][1], data[idx][4]
                pid_record[pid1] = pid_record[pid1] - set([idx])
                pid_record[pid2] = pid_record[pid2] - set([idx])
            removed_record.clear()
            for pid in pid_record:
                if len(pid_record[pid]) < settings["FILTER_LOCATION_RECORD_NUM"]:
                    removed_pid.add(pid)
                    for idx in pid_record[pid]:
                        removed_record.add(idx)
            for pid in removed_pid:
                pid_record.pop(pid)
            removed_pid.clear()
            for uid in uid_record:
                if len(uid_record[uid]) < settings["FILTER_USER_RECORD_NUM"]:
                    removed_uid.add(uid)
                    for idx in uid_record[uid]:
                        removed_record.add(idx)
            for uid in removed_uid:
                uid_record.pop(uid)
            removed_uid.clear()
            print "Removed Record Number: %d" % len(removed_record)
            if len(removed_record) == 0:
                uid_set = set(uid_record.keys())
                pid_set = set(pid_record.keys())
                data = None
                uid_record = None
                pid_record = None
                break

    tag = False
    with open(checkin_outfile, "w") as wfp:
        writer = csv.writer(wfp, lineterminator="\n")
        for entry in csv.reader(open(checkin_infile)):
            if not tag:
                tag = True
                writer.writerow(entry)
            else:
                uid, pid1, pid2 = map(int, [entry[0], entry[1], entry[4]])
                if uid in uid_set and pid1 in pid_set and pid2 in pid_set:
                    writer.writerow(entry)
Example #41
0
import csv

csvpath = os.path.join("Resources", "budget_data.csv")

# parameters
total_months = 0
months_changes = []
changes = []
greatest_increase = ["", 0]  # date & number
greatest_decrease = [
    "", 99999999
]  # date & number 9999999 is represents the limit of decrease
total = 0

with open(csvpath, newline='') as csvfile:
    csvreader = csv.reader(csvfile, delimiter=',')

    header = next(csvreader)
    first_row = next(csvreader)

    # ----------------------------------------------------------------
    total_months = total_months + 1
    total = total + int(first_row[1])
    previous_month = int(first_row[1])

    for row in csvreader:
        total_months = total_months + 1
        total = total + int(
            row[1])  # not called first_row becasue it's already "used"
        change = int(row[1]) - previous_month  # current_month - previous_month
        previous_month = int(
Example #42
0
# --------------------------------------------------

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--input",
                        "-i",
                        type=str,
                        default="colors.csv",
                        help="Input file (CSV)")
    parser.add_argument("--output",
                        "-o",
                        type=str,
                        default="colors.py",
                        help="Output file (Python)")
    parser.add_argument("--download",
                        "-d",
                        action="store_true",
                        help="Download from Wikipedia")
    args = parser.parse_args()

    if args.download:
        download_colors(args.input)

    with open(args.input, "r") as input_file:
        with open(args.output, "w") as output_file:
            for row in csv.reader(input_file):
                name = row[0].upper()
                rgb = (int(row[-3]), int(row[-2]), int(row[-1]))
                output_file.write("{0} = {1}\n".format(name, rgb))
Example #43
0
import numpy as np
import matplotlib.pyplot as plt
import csv
from sklearn import model_selection
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

# In[2]:

X = []  # an element of X is represented as (filename,text)
Y = [
]  # an element of Y represents the newsgroup category of the corresponding X element

with open('Dataset_preprocessed.csv', "r", encoding="utf8") as f:
    reader = csv.reader(f)
    included_cols = [5, 13]
    # included_cols1 = [5]

    for row in reader:
        content = list(row[i] for i in included_cols)
        # content1 = list(row[i] for i in included_cols1)
        # print(content[0])
        # print(content1)
        X.append(content[1])
        Y.append(content[0])
        # print(X)
        # print(Y)

# In[3]:
Example #44
0
# -*- coding: utf-8 -*-

import csv
import json

f = open("data/CUIs.csv", 'r',encoding="utf-8" )
reader = csv.reader(f,delimiter=',')
cuis_lang=list(reader)
f.close()


rows=[]
for row in cuis_lang:
    index=dict()
    index["_source"]={'cui': row[0] , 'label': row[1] , 'lang': row[2]}
    rows.append(index)
    
    
with open("data/umlsDump.json", "w", encoding="utf-8") as output:
    for row in rows:
        output.write(json.dumps(row))
        output.write('\n')

    


Example #45
0
try:
    page = urllib.request.urlopen(webRequest)
    content = page.read()
    output = open(localFilePath, "wb")
    output.write(content)
    output.close()
except urllib.request.HTTPError as err:
    print(err.fp.read())

if os.path.exists(localFilePath):
    print("file exists")
    lineNum = 0
    listOfLists = []

    with open(localFilePath, "r") as csvFile:
        lineReader = csv.reader(csvFile)
        for row in lineReader:
            price = int(row[1])
            date = row[2]
            postcode = row[3]
            oneResultRow = [price, date, postcode]
            listOfLists.append(oneResultRow)
    print("done with file")

    listOfListsSorted = sorted(listOfLists, key=lambda x:x[0], reverse=True)
    print(listOfListsSorted)
else:
    print("no file")


Example #46
0
def _extend(filename, n, keys=()):
    """
    For internal use only. Extend a file.

    :param file: str
    :param n: int
    :param keys: tuple
    :return: str, set
    """

    with open(filename, 'r') as file:
        header = file.readline()
        reader = csv.reader(file)
        lines = [_ for _ in reader]

    fname = f"{filename}_{n}.csv"
    with open(fname, 'w') as file:
        file.write(header)
        for line in lines:
            file.write(','.join(line) + '\n')
        # file.writelines([','.join(x) for x in lines])
        # file.write('\n')

        if not keys:
            these_keys = set([line[0].strip() for line in lines])
        else:
            these_keys = set()
            n = n // 5

        for i in range(n):
            for line in lines:
                mod_words = line[:]

                if keys:  # Use provided users and products
                    uid = random.choice(keys[0])
                    pid = random.choice(keys[1])

                    counter = 0
                    while (uid, pid) in these_keys:
                        uid = random.choice(keys[0])
                        pid = random.choice(keys[1])
                        if counter > 100:
                            break

                    if (uid, pid) in these_keys:
                        continue

                    file.write(f"{uid}, {pid}, {random.randint(1, int(mod_words[-1].strip()) * 2)}\n")
                else:
                    mod_key = ''.join([random.choice(string.ascii_letters) for _ in range(len(mod_words[0]))])
                    while mod_key.strip() in these_keys:
                        mod_key = ''.join([random.choice(string.ascii_letters) for _ in range(len(mod_words[0]))])
                    these_keys.add(mod_key)
                    mod_words[0] = mod_key

                    for j, word in enumerate(line[1:], 1):
                        # If a phone number, randomize digits
                        if re.match(r"\d{3}-\d{3}-\d{4}", word.strip()):
                            num = f"{random.randint(0, 9999999999):09d}"
                            mod_words[j] = num[:3] + '-' + num[3:6] + '-' + num[-4:]
                        # If a number, randomize
                        elif re.fullmatch(r"\d*", word.strip()):
                            num = random.randint(1, int(word.strip()) * 2)
                            mod_words[j] = str(num)
                        else:  # Replace 1/2 of characters with random digits
                            mod_locs = [random.randint(0, len(word) - 1) for _ in range(len(word) // 2)]
                            lst = list(word)
                            for loc in mod_locs:
                                lst[loc] = random.choice(string.ascii_letters)
                            mod_words[j] = ''.join(lst)

                    file.write(','.join(mod_words) + '\n')
            # file.writelines([]) for line in lines])

    return fname, these_keys
Example #47
0
def clock_in():
    """
    Student selects a subject to give attendance for, then using cascade classifier and trainer.yml, the student is verified
    """
    notifier.configure(
        text='CONSOLE: Analysing and Rendering Facial Features..')
    start = time.time()
    period = 10

    face_cascade = cv2.CascadeClassifier(
        'assets/haarcascade_frontalface_default.xml')
    """
    Taking input from drop down menu
    """
    subjectchoice = (clicked.get())

    cap = cv2.VideoCapture(0)

    recognizer = cv2.face.LBPHFaceRecognizer_create()
    recognizer.read('trainer/trainer.yml')

    i = 0
    j = 0

    font = cv2.FONT_HERSHEY_SIMPLEX

    col_names = ['Id', 'Name',  'Gender',  'Age',
                 'Phone',  'Address',  'Subject',  'Date', 'Time']

    df = pd.read_csv("student_details/student_details.csv")

    while True:
        ret, frame = cap.read()

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        faces = face_cascade.detectMultiScale(gray, 1.3, 7)

        for (x, y, w, h) in faces:

            # roi is the region of interest, it slices the gray array. It selected row starting with y till y+h and column starting with x till x+w
            roi_gray = gray[y:y + h, x:x + w]
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

            serial, confidence = recognizer.predict(roi_gray)

            if (confidence < 40):
                ts = time.time()
                date = datetime.datetime.fromtimestamp(ts).strftime('%d-%m-%Y')
                timeStamp = datetime.datetime.fromtimestamp(
                    ts).strftime('%H:%M:%S')
                fetchedname = df.loc[df['SERIAL NO.'] == serial]['NAME'].values
                fetchedgender = df.loc[df['SERIAL NO.']
                                       == serial]['GENDER'].values
                fetchedage = df.loc[df['SERIAL NO.'] == serial]['AGE'].values
                fetchedphonenumber = df.loc[df['SERIAL NO.']
                                            == serial]['PHONE NUMBER'].values
                fetchedaddress = df.loc[df['SERIAL NO.']
                                        == serial]['ADDRESS'].values
                ID = df.loc[df['SERIAL NO.'] == serial]['ID'].values
                ID = str(ID)
                ID = ID[1:-1]
                name2 = str(fetchedname)
                name2 = name2[2:-2]
                gender2 = str(fetchedgender)
                gender2 = gender2[2:-2]
                age2 = str(fetchedage)
                age2 = age2[1:-1]
                phonenumber2 = str(fetchedphonenumber)
                phonenumber2 = phonenumber2[1:-1]
                address2 = str(fetchedaddress)
                address2 = address2[2:-2]

                attendance = [str(ID),  name2,  gender2,  age2,  phonenumber2,  address2,  subjectchoice,  str(date),
                              str(timeStamp)]

            else:
                Id = '\n Unable To Recognize This Entity! \n'
                name2 = str(Id)

            cv2.putText(frame, "Name : " + str(name2) + " Confidence (Lower the Better):  " +
                        str(int(confidence)), (x, y - 10), font, 1, (120, 255, 120), 4)
        cv2.imshow('frame', frame)

        if time.time() > start + period:
            break

        if cv2.waitKey(100) & 0xFF == ord('q'):
            break

    ts = time.time()
    date = datetime.datetime.fromtimestamp(ts).strftime('%d-%m-%Y')

    """
    Writing to daily_generated_attendance_csv + date + .csv to display in GUI and also as a CSV
    """
    exists = os.path.isfile(
        "daily_generated_attendance_csv/daily_generated_attendance_csv_" + date + ".csv")

    if exists:
        with open("daily_generated_attendance_csv/daily_generated_attendance_csv_" + date + ".csv", 'a+') as csvFile1:
            writer = csv.writer(csvFile1)
            writer.writerow(attendance)
        csvFile1.close()
    else:
        with open("daily_generated_attendance_csv/daily_generated_attendance_csv_" + date + ".csv", 'a+') as csvFile1:
            writer = csv.writer(csvFile1)
            writer.writerow(col_names)
            writer.writerow(attendance)
        csvFile1.close()

    with open("daily_generated_attendance_csv/daily_generated_attendance_csv_" + date + ".csv", 'r') as csvFile1:
        reader1 = csv.reader(csvFile1)
        for lines in reader1:
            i = i + 1
            if (i > 1):
                if (i % 2 != 0):
                    iidd = str(lines[0]) + '   '

    csvFile1.close()
    """
    Inserting Details to SQL Database i.e Writing attendance[] and drop down menu choice to sql
    """
    subid = 'PyAB'
    teacherid = '1'
    if(subjectchoice == 'Python'):
        subid = 'PyAB'
        teacherid = '1'
    elif(subjectchoice == 'DBMS'):
        subid = 'DbKM'
        teacherid = '2'
    elif(subjectchoice == 'TCS'):
        subid = 'TcSY'
        teacherid = '3'
    elif(subjectchoice == 'OS'):
        subid = 'OsMR'
        teacherid = '4'

    myId = str(ID)
    myId = myId[1:-1]

    cursor.execute('INSERT INTO attends(studid, subid, attdate, atttime) VALUES(%s,%s,%s,%s)',
                   (myId, subid, str(date), str(timeStamp)))

    conn.commit()

    print("Attendance inserted")

    cap.release()
    cv2.destroyAllWindows()
    notifier.configure(
        text='CONSOLE: Thank You! Please Check the Attendance Sheet..')
    csv_updater()
import csv
import numpy as np

with open('sample.csv', newline='', errors='ignore') as csvfile:
    rows = csv.reader(csvfile)
    for row in rows:
        a = row[0]
        b = row[1]
        a = '%03d' % int(a)
        txt_path = './test/txt/p225/' + 'p225_' + a + '.txt'
        print(txt_path)
        f = open(txt_path, 'w')
        f.write(b)
        f.close()
Example #49
0
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 28 14:38:35 2020

@author: Slayer20
"""

import csv

exp=[]
imp=[]
with open("C:\Users\Slayer20\Downloads\Course Resources\synergy_logistics_database.csv", "r") as archivo_csv:
    lector = csv.reader(archivo_csv)
    
    for linea in lector:
        exp.append(linea[2])
        imp.append(linea[3])

imp.pop(0)
exp.pop(0)        
agrup = zip(exp,imp)

i=1
cont=[]
com=[]
while i <= len(agrup):
    o = agrup[i]
    x = agrup.count(o)
    com.append(o)
    cont.append(x)
    i=i+1
Example #50
0
def capture_img():
    """
    This function takes images of the student and stores them in a 'dataset/' directory
    """

    notifier.configure(text='CONSOLE: Capturing Images.. Creating A Dataset..')
    # Firstly make sure all the directories are present
    path_existence("dataset/")
    path_existence("daily_generated_attendance_csv/")
    path_existence("student_details/")
    path_existence("trainer/")
    path_existence("trainer/trainer.yml")

    columns = ['SERIAL NO.',  'ID',  'NAME',
               'GENDER',  'AGE',  'PHONE NUMBER',  'ADDRESS']
    serial = 0

    exists = os.path.isfile("student_details/student_details.csv")

    if exists:
        with open("student_details/student_details.csv", 'r') as csvFile1:
            reader1 = csv.reader(csvFile1)
            for l in reader1:
                serial = serial + 1
        csvFile1.close()
    else:
        with open("student_details/student_details.csv", 'a+') as csvFile1:
            writer = csv.writer(csvFile1)
            writer.writerow(columns)
            serial = 1
        csvFile1.close()

    Id = (txtfield1.get())
    name = (txtfield2.get())
    gender = (txtfield3.get())
    age = (txtfield4.get())
    phonenumber = (txtfield5.get())
    address = (txtfield6.get())
    """
    Checking if user with same id exists, if yes then update
    """
    lst = [0]
    idchecker = False

    cursor.execute('select * from student_details')
    for x in cursor.fetchall():
        lst.append(x[0])

    for i in lst:
        if Id == i:
            idchecker = True
            break
        else:
            pass

    if idchecker == True:
        """
        User exists, hence needs to be updated
        """
        notifier.configure(
            text='CONSOLE: User already exists, Please Update Manually In The View Database section..')
    else:
        """
        User doesnt exist, hence needs to be added
        """
        cap = cv2.VideoCapture(0)

        face_cascade = cv2.CascadeClassifier(
            'assets/haarcascade_frontalface_default.xml')

        count = 0

        while (True):

            ret, frame = cap.read()

            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            face_rects = face_cascade.detectMultiScale(
                gray, 1.3, 5)  # img, scaleFactor=1.1, minNeighbors=5,

            for (x, y, w, h) in face_rects:

                # Note: Unlike matplotlib.pyplot, cv2 takes images as BGR instead of RGB
                cv2.rectangle(frame, (x, y), (x + w, y + h),
                              (225, 206, 128), 2)

                count += 1

                cv2.imwrite("dataset/ " + name + "." + str(serial) + "." +
                            Id + '.' + str(count) + ".jpg", gray[y:y + h, x:x + w])

                cv2.imshow('Learning Your Face', frame)

            if cv2.waitKey(100) & 0xFF == ord('q'):
                break

            elif count >= 100:
                notifier.configure(
                    text='CONSOLE: Images Have Been Captured Successfully..')
                break

        cap.release()

        cv2.destroyAllWindows()
        """
        Saving Details to student_details.csv
        """
        row = [serial,  Id,  name,  gender,
               age,  phonenumber,  address]
        with open('student_details/student_details.csv', 'a+') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerow(row)
        csvFile.close()
        """
        Inserting Details to SQL Database
        """
        cursor.execute('INSERT INTO student_details(studid, stdname, gender, age, phoneno, address) VALUES(%s,%s,%s,%s,%s,%s)',
                       (Id, name, gender, age, phonenumber, address))
        conn.commit()
        print("Record inserted")
Example #51
0
        easy_install_ep = [k for k in console
                if re.match(r'easy_install(-\d\.\d)?$', k)]
        for k in easy_install_ep:
            del console[k]

    # Generate the console and GUI entry points specified in the wheel
    if len(console) > 0:
        generated.extend(maker.make_multiple(['%s = %s' % kv for kv in console.items()]))
    if len(gui) > 0:
        generated.extend(maker.make_multiple(['%s = %s' % kv for kv in gui.items()], {'gui': True}))

    record = os.path.join(info_dir[0], 'RECORD')
    temp_record = os.path.join(info_dir[0], 'RECORD.pip')
    with open_for_csv(record, 'r') as record_in:
        with open_for_csv(temp_record, 'w+') as record_out:
            reader = csv.reader(record_in)
            writer = csv.writer(record_out)
            for row in reader:
                row[0] = installed.pop(row[0], row[0])
                if row[0] in changed:
                    row[1], row[2] = rehash(row[0])
                writer.writerow(row)
            for f in generated:
                h, l = rehash(f)
                writer.writerow((f, h, l))
            for f in installed:
                writer.writerow((installed[f], '', ''))
    shutil.move(temp_record, record)

def _unique(fn):
    @functools.wraps(fn)
Example #52
0
import csv
import os
import logging
import yaml

pat_dict = {}
visit_dict = {}
input_file = os.path.join(os.path.dirname(__file__), "..", "config",
                          "identifiers.csv")
output_file = os.path.join(os.path.dirname(__file__), "..", "config",
                           "identifiers.yml")
with open(input_file, newline="") as f:
    csvreader = csv.reader(f, delimiter=",", quotechar="\"")
    for i in range(5):
        next(csvreader)

    rows = map(lambda x: x[1:], csvreader)

    rowst = list(map(list, zip(*rows)))

    for row in rowst:
        pat, visit, *row1 = list(
            map(
                lambda x: x.replace(" ", "").replace(u"\xa0", "").replace(
                    "=", ":"), row))
        row2 = map(lambda x: x.upper(), filter(lambda x: ":" in x, row1))
        ids = list(row2)
        if pat != "N/A" and pat != "":
            pat_dict[pat] = ids
        if visit != "N/A" and visit != "":
            visit_dict[visit] = ids
Example #53
0
data_folder_normalized = "users/data_normalized/"
'''

features=np.concatenate(([np.array(hardCodedPressure)[:,1]],[np.array(accMomVariance)[:,1]],[np.array(accBabyVariance)[:,1]],[np.array(preMomVariance)[:,1]],[np.array(preBabyVariance)[:,1]],[np.array(accCorrelation)[:,1]], \
[np.array(accDifference)[:,1]],[np.array(preDifference)[:,1]],[np.array(selfAccDiffMom)[:,1]],[np.array(selfAccDiffBaby)[:,1]],[np.array(selfPreDiffMom)[:,1]], \
[np.array(selfPreDiffBaby)[:,1]],[np.array(preCorrelation)[:,1]],[np.array(filteredPreMom)[:,1]],[np.array(filteredPreBaby)[:,1]]),axis=0)
'''

if __name__ == '__main__':

    files = os.listdir(data_folder)

    for f in files:
        _data = []
        file = open(data_folder + str(f), "r")
        reader = csv.reader(file)

        for row in reader:
            _data.append(map(float, row))

        file.close()

        _data = np.array(_data)

        for i in range(15):
            if i != 0 and i != 5 and i != 12:
                #print _data[:, i]
                if np.max(_data[:, i]) - np.min(_data[:, i]) == 0:
                    _data[:, i] = 0.5
                else:
                    _data[:, i] = (_data[:, i] - np.min(_data[:, i])) / (
Example #54
0
 def get_reader(self, csv_data, **reader_kwargs):
     reader = csv.reader(StringIO(csv_data), **reader_kwargs)
     yield reader
Example #55
0
# Split the bam file by cluster ID.
# Credited to https://divingintogeneticsandgenomics.rbind.io/post/split-a-10xscatac-bam-file-by-cluster/

import os
import sys
import pysam
import csv

cluster_file = sys.argv[1]
bam_file = sys.argv[2]
output_location = sys.argv[3]
output_prefix = sys.argv[4]

cluster_dict = {}
with open(cluster_file) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter='\t')
    # skip header
    header = next(csv_reader)
    for row in csv_reader:
        cluster_dict[row[0]] = row[1]

clusters = set(x for x in cluster_dict.values())

fin = pysam.AlignmentFile(bam_file, "rb")

# open the number of bam files as the same number of clusters, and map the out file handler to the cluster id,
# write to a bam with wb
fouts_dict = {}
for cluster in clusters:
    output_filename = os.path.join(output_location,
                                   "{}_{}.bam".format(output_prefix, cluster))
Example #56
0
import csv

import models, datasets

labelfile = "../resources/labels.csv"
testfile  = "../resources/test.csv"
# testfile  = "../resources/train.csv"

with open(labelfile, newline='') as csvfile:
    data = list(csv.reader(csvfile))

label_dic = {}

for s in data:
    label_dic[s[0]] = s[1]

# print (label_dic)

with open(testfile, newline='') as csvfile:
    data = list(csv.reader(csvfile))

total   = 0
correct = 0

for s in data:
    right_label = label_dic[s[0]]
    message     = s[1]

    intent, probability = models.classify_intent(
        models.get_classifier(),
        models.get_vectorizer(),
                        help='notifications file')
    parser.add_argument('-c', '--codes', type=argparse.FileType('r'),
                        help='Optional file with codes. If present, the '
                             'file will be populated with codes. '
                             'No codes will be sent')
    parser.add_argument('--config', type=str, dest='config_file',
                        default='config.ini',
                        help='config file (detaulf config.ini)')
    args = parser.parse_args()

    # Read configuration
    if not os.path.isfile(args.config_file):
        print(f'Config file {args.config_file} is missing. Aborting')
        exit(1)
    with open(args.config_file) as fp:
        config = configparser.ConfigParser()
        config.read_file(fp)

    # Read data
    reader = csv.DictReader(args.notif_file)
    data = list(reader)

    codes = None
    send = True
    if args.codes:
        codes = [code_line[0] for code_line in csv.reader(args.codes)]
        send = False

    main(data=data, codes=codes, notif_file=args.notif_file,
         config=config, send=send)
def preprocess(filename):
    with open(filename, 'r') as f:
        dataset = [row for row in csv.reader(f.read().splitlines())]
    return dataset
Example #59
0
            'Retailers List/Business Data/processed_data/WA'
FDA_dir = 'C:/Users/lpatterson/AnacondaProjects/Tribal_Master'
fda_df = pd.read_excel(FDA_dir + '/input/Public retail data_original.xlsx')
fda_df = fda_df.loc[fda_df['State'] == 'WA', :]
fda_df.reset_index(inplace=True)

# load WA tab-delimited data sets
os.chdir(WA_rawdir)
rawfiles = []
for file in glob.glob("*.txt"):
    rawfiles.append(file)

for k in rawfiles:
    # some inconsistencies with the tab delimited data, so we need to first process with csv
    file = open(k, 'rt', encoding="utf8")
    reader = csv.reader(file, delimiter='\t', quotechar=None)
    csv_list = []

    for m, l in enumerate(reader):
        if m % 100000 == 0:
            print(m)
        csv_list.append(l)
    # make dataframe with first row as column headers
    biz_df = pd.DataFrame(csv_list)
    biz_df.columns = biz_df.iloc[0]
    # assign column number as name if missing
    biz_df.columns = [
        'col' + str(i) if j is None else j
        for i, j in enumerate(biz_df.columns)
    ]
    biz_df = biz_df[1:]
Example #60
0
A biblioteca Lib/csv.py tem as funções:

1. csv.reader;
2. csv.writer.

Parâmetros necessários para a leitura e escrita de arquivos CSV usando a Lib/csv.py:

a. Delimiter - Caractere utilizado para separar campos;
b. Quotechar - Caractere usado para campos que contém caracteres especiais;
c. Quoting - Controla quando as cotações devem ser geradas pelo escritor e reconhecidas pelo leitor;
d. newline = ''
'''

#Escrevendo em um arquivo CSV
import csv
print('Biblioteca importada.')
with open('planilha.csv', 'w', newline='') as csvfile:
    spamwriter = csv.writer(csvfile,
                            delimiter=' ',
                            quotechar='|',
                            quoting=csv.QUOTE_MINIMAL)
    spamwriter.writerow(['Spam'] * 5 + ['Texto Adicional'])
    spamwriter.writerow(['linha1', 'linha2', 'linha3'])

with open('planilha.csv', 'r', newline='') as arquivo:
    spamreader = csv.reader(arquivo, delimiter=' ', quotechar='|')
    print('Tipo de spamreader: ', type(spamreader))
    for linha in spamreader:
        print('Tipo de linha: ', type(linha))
        print(', '.join(linha))