예제 #1
0
def order_ads_limited(csv_file_path: str, column_name: str) -> str:
    """
    the main function
    :param csv_file_path: the csv's path
    :param column_name: the requested column name
    :return: the sorted file
    """
    column_id: int = _get_column_id(csv_file_path, column_name)
    out_file_path: str = io_utils.get_output_file_path(csv_file_path)

    csvsorter.csvsort(csv_file_path, [column_id],
                      output_filename=out_file_path,
                      max_size=2000)

    return out_file_path
예제 #2
0
def merge_dups(tensor_name, num_modes, merge_func=sum):
    """ Remove duplicate non-zeros from a tensor file. """
    sorted_f = tensor_name + '.sorted'
    tmp_name = str(uuid.uuid4().hex) + '.tns'
    try:
        # Sort the "CSV" tensor -- this library prints to stdout in old versions,
        # so suppress that
        with open(os.devnull, 'w') as redirect:
            with redirect_stdout(redirect):
                csvsort(tensor_name,
                        range(num_modes),
                        column_types=int,
                        output_filename=sorted_f,
                        max_size=800,
                        delimiter=' ',
                        has_header=False)

        # Merge duplicate non-zeros
        with open(tmp_name, 'w') as fout:
            dup_lines = []
            with open(sorted_f, 'r') as fin:
                for line in fin:
                    line = line.split()

                    # indices do not match -- merge previous duplicates
                    if len(dup_lines) > 0 and line[:-1] != dup_lines[0][:-1]:
                        vals = [literal_eval(x[-1]) for x in dup_lines]
                        inds = [str(x) for x in dup_lines[0][:-1]]
                        print('{} {}'.format(' '.join(inds), merge_func(vals)),
                              file=fout)
                        dup_lines = []

                    dup_lines.append(line)

            # final flush
            vals = [eval(x[-1]) for x in dup_lines]
            inds = [str(x) for x in dup_lines[0][:-1]]
            print('{} {}'.format(' '.join(inds), merge_func(vals)), file=fout)

            # overwrite original data
            os.rename(tmp_name, tensor_name)

    except:
        # if there was an exception, we don't want to overwrite the original data
        os.remove(tmp_name)

    finally:
        os.remove(sorted_f)
예제 #3
0
cheader = file1.readline()  #把第一行讀出來即丟棄
file1_rows = csv.reader(file1, delimiter = "\t")  #每一列為tab 
stockfn_in = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\stock_tmp3.csv"
file_in = open(stockfn_in, "w", encoding = "utf-8", newline = "")  #輸入檔案
writer_in = csv.writer(file_in)

for i in file1_rows:   #利用for去寫入 將個股空白去除
	i = map( lambda x: x.strip(), i)
	writer_in.writerow(i)

import csvsorter
wd = "C:\\Users\\b1013\\Desktop\\Pythoncode2"
os.chdir(wd)
stockfn_tmp3 = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\stock_tmp3.csv"
stocksorted = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\stocksorted.csv"
csvsorter.csvsort( stockfn_tmp3, [0,2], output_filename = stocksorted , has_header = True) # 來源檔, 排序方式, 目的檔, 是否有標題

#匯入market大盤資料
marketfn = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\bigmarket.txt"
filebigmarket = open(marketfn, "r", encoding = "cp950" ,newline = "")
cheader1 = filebigmarket.readline()  #把第一行讀出來即丟棄
file_big_rows = csv.reader( filebigmarket, delimiter = "\t") 
bigmarket_in = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\bigmarket_in.csv"
file_bigmarket_in = open(bigmarket_in, "w", encoding = "utf-8", newline ="") 
writer_big_in = csv.writer(file_bigmarket_in)

for i in file_big_rows:   #作空白去除
	i = map( lambda x: x.strip(), i)
	writer_big_in.writerow(i)

bigmarket = "C:\\Users\\b1013\\Desktop\\Pythoncode2\\bigmarket_in.csv"
예제 #4
0
#寫檔案
stockfn_tmp1 = "C:\\Users\\Master\\Desktop\\大學 課程\\python 商管程式設計 台大MOOC (二)\\上課練習檔案\\week4\\stock_c.csv"
fh3 = open(stockfn_tmp1, 'w', encoding='utf-8', newline='')
writer3 = csv.writer(fh3)
fh3.write('COID,Name,MDATE,ROI,MV,CLOSE\n')  #寫入標題
for arow in reader1:
    arow = map(lambda x: x.strip(), arow)
    writer3.writerow(arow)
fh3.close()
fh1.close()

#把剛剛的檔案按照股票及日期 做排序
stockfn_tmp1 = "C:\\Users\\Master\\Desktop\\大學 課程\\python 商管程式設計 台大MOOC (二)\\上課練習檔案\\week4\\stock_c.csv"
stockfn_sorted = "C:\\Users\\Master\\Desktop\\大學 課程\\python 商管程式設計 台大MOOC (二)\\上課練習檔案\\week4\\stock_sorted.csv"
csvsorter.csvsort(stockfn_tmp1, [0, 2],
                  output_filename=stockfn_sorted,
                  has_header=True)
#把市場報酬率的資料 加進字典
mktfn = "C:\\Users\\Master\\Desktop\\大學 課程\\python 商管程式設計 台大MOOC (二)\\上課練習檔案\\week4\\market.csv"
fh1 = open(mktfn, 'r', newline='')
reader1 = csv.DictReader(fh1)
mktret = dict()
for arow in reader1:
    mktret[arow['MDATE']] = float(arow['MKT'])
fh1.close()
print("read %d market return data" % len(mktret))

#把已經排序好的股票excel 跟市場報酬率做合併
coidlist = []
namelist = []
alphalist = []