def order_ads_limited(csv_file_path: str, column_name: str) -> str: """ the main function :param csv_file_path: the csv's path :param column_name: the requested column name :return: the sorted file """ column_id: int = _get_column_id(csv_file_path, column_name) out_file_path: str = io_utils.get_output_file_path(csv_file_path) csvsorter.csvsort(csv_file_path, [column_id], output_filename=out_file_path, max_size=2000) return out_file_path
def merge_dups(tensor_name, num_modes, merge_func=sum): """ Remove duplicate non-zeros from a tensor file. """ sorted_f = tensor_name + '.sorted' tmp_name = str(uuid.uuid4().hex) + '.tns' try: # Sort the "CSV" tensor -- this library prints to stdout in old versions, # so suppress that with open(os.devnull, 'w') as redirect: with redirect_stdout(redirect): csvsort(tensor_name, range(num_modes), column_types=int, output_filename=sorted_f, max_size=800, delimiter=' ', has_header=False) # Merge duplicate non-zeros with open(tmp_name, 'w') as fout: dup_lines = [] with open(sorted_f, 'r') as fin: for line in fin: line = line.split() # indices do not match -- merge previous duplicates if len(dup_lines) > 0 and line[:-1] != dup_lines[0][:-1]: vals = [literal_eval(x[-1]) for x in dup_lines] inds = [str(x) for x in dup_lines[0][:-1]] print('{} {}'.format(' '.join(inds), merge_func(vals)), file=fout) dup_lines = [] dup_lines.append(line) # final flush vals = [eval(x[-1]) for x in dup_lines] inds = [str(x) for x in dup_lines[0][:-1]] print('{} {}'.format(' '.join(inds), merge_func(vals)), file=fout) # overwrite original data os.rename(tmp_name, tensor_name) except: # if there was an exception, we don't want to overwrite the original data os.remove(tmp_name) finally: os.remove(sorted_f)
cheader = file1.readline() #把第一行讀出來即丟棄 file1_rows = csv.reader(file1, delimiter = "\t") #每一列為tab stockfn_in = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\stock_tmp3.csv" file_in = open(stockfn_in, "w", encoding = "utf-8", newline = "") #輸入檔案 writer_in = csv.writer(file_in) for i in file1_rows: #利用for去寫入 將個股空白去除 i = map( lambda x: x.strip(), i) writer_in.writerow(i) import csvsorter wd = "C:\\Users\\b1013\\Desktop\\Pythoncode2" os.chdir(wd) stockfn_tmp3 = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\stock_tmp3.csv" stocksorted = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\stocksorted.csv" csvsorter.csvsort( stockfn_tmp3, [0,2], output_filename = stocksorted , has_header = True) # 來源檔, 排序方式, 目的檔, 是否有標題 #匯入market大盤資料 marketfn = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\bigmarket.txt" filebigmarket = open(marketfn, "r", encoding = "cp950" ,newline = "") cheader1 = filebigmarket.readline() #把第一行讀出來即丟棄 file_big_rows = csv.reader( filebigmarket, delimiter = "\t") bigmarket_in = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\bigmarket_in.csv" file_bigmarket_in = open(bigmarket_in, "w", encoding = "utf-8", newline ="") writer_big_in = csv.writer(file_bigmarket_in) for i in file_big_rows: #作空白去除 i = map( lambda x: x.strip(), i) writer_big_in.writerow(i) bigmarket = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\bigmarket_in.csv"
#寫檔案 stockfn_tmp1 = "C:\\Users\\Master\\Desktop\\大學 課程\\python 商管程式設計 台大MOOC (二)\\上課練習檔案\\week4\\stock_c.csv" fh3 = open(stockfn_tmp1, 'w', encoding='utf-8', newline='') writer3 = csv.writer(fh3) fh3.write('COID,Name,MDATE,ROI,MV,CLOSE\n') #寫入標題 for arow in reader1: arow = map(lambda x: x.strip(), arow) writer3.writerow(arow) fh3.close() fh1.close() #把剛剛的檔案按照股票及日期 做排序 stockfn_tmp1 = "C:\\Users\\Master\\Desktop\\大學 課程\\python 商管程式設計 台大MOOC (二)\\上課練習檔案\\week4\\stock_c.csv" stockfn_sorted = "C:\\Users\\Master\\Desktop\\大學 課程\\python 商管程式設計 台大MOOC (二)\\上課練習檔案\\week4\\stock_sorted.csv" csvsorter.csvsort(stockfn_tmp1, [0, 2], output_filename=stockfn_sorted, has_header=True) #把市場報酬率的資料 加進字典 mktfn = "C:\\Users\\Master\\Desktop\\大學 課程\\python 商管程式設計 台大MOOC (二)\\上課練習檔案\\week4\\market.csv" fh1 = open(mktfn, 'r', newline='') reader1 = csv.DictReader(fh1) mktret = dict() for arow in reader1: mktret[arow['MDATE']] = float(arow['MKT']) fh1.close() print("read %d market return data" % len(mktret)) #把已經排序好的股票excel 跟市場報酬率做合併 coidlist = [] namelist = [] alphalist = []