Beispiel #1
0
 def read_from_dir(cls, path, outexcel=False, outpath=None):
     files = os.listdir(path)
     if outexcel:
         moutexcel = Excel(outpath)
         moutexcel.new().append([[item, item] for item in files], "sheet1")
         moutexcel.close()
     else:
         return files
Beispiel #2
0
    def __init__(self, file_name=None):
        mexcel = Excel(file_name)
        self._raw_acode_data = mexcel.read()

        self.region_list = [re.sub('\s+','',item[1]) for item in self._raw_acode_data[1:]]
        self.admin_code_dict = OrderedDict([(re.sub('\s+','',item[1]),str(int(item[0]))) for item in self._raw_acode_data[1:]])
        self.code_admin_dict = OrderedDict([(str(int(item[0])),re.sub('\s+','',item[1])) for item in self._raw_acode_data[1:]])

        self._create_relationship()
# coding=UTF-8

import pandas as pd
import numpy as np
from libs.database.class_cgssdatabase import CgssDatabase
from libs.file.class_Excel import Excel

filename = r"D:\data\student.xls"
Path = r"D:\data\labdata"
mexcel = Excel(filename)
mdata = mexcel.read()
mdata = [item[1] for item in mdata]

cgdb = CgssDatabase(year=2013)
pdata = cgdb.variables(
    variables=[
        ["a2", False],
        ["a3a", False],
        ["a4", False],
        ["a7a", False],
        ["a8a", False],
        ["a8b", False],
        ["a10", False],
        ["a18", False],
        ["a59j", False],
        ["a69", False],
        ["a89b", False],
    ]
)
pdata.columns = [
    "A10政治面貌",
# coding=UTF-8

import numpy as np
import pandas as pd
from libs.file.class_Excel import Excel

# 0. 导入学生名单
STUDENT_FILE = 'E:\\temp\\lab\\student.xls'
mexcel = Excel(STUDENT_FILE)
student_list = mexcel.read()
students = [item[1] for item in student_list]

# 1. 生成学生数据和答案
out_path = 'E:\\temp\\lab\\'
file_name = '_data.xls'
rules1 = [1,2,4]
rules3 = [3,7,9]
result = [['student','model1','','','','model2','','','','model3','','','','model4','','','']]
for student in students:
    mu, sigma = 0, np.random.randint(1,20)/10
    miu = np.random.normal(mu, sigma, 1000)

    x1 = np.random.randint(1,100,1000)
    x2 = 5 + np.random.randint(1,5)*x1/10 + np.random.normal(mu, 0.1, 1000)
    x3 = np.random.randint(1,200,1000)

    pdata = pd.DataFrame({'x1':x1,'x2':x2,'x3':x3})
    one_student_coefs = [student]
    for i in range(0,4):
        constant = np.random.randint(1,100,1)
        coefs = np.random.randint(0,10,3)
            if re.match('^(-)?\d+((\.|.)\d+)?$',new_item) is not None:
                new_item = re.sub('.','.',new_item)
                new_row.append(float(new_item))
            else:
                all = False
                new_row.append(item)
        return new_row,all

if __name__ == '__main__':
    filename = r'E:\data\procedure\Process\reduction\data\admincode\2003.xls'
    acodefile = AdminCodeFile(filename)

    filename = r'E:\data\procedure\Process\reduction\data\2003_prefecture\3_3_按三次业人员就业状况_地级市_2003.xls'
    mexcel = WinExcel(filename)
    mdata = mexcel.read()
    reduction = CitydataReduction(mdata,acodefile)

    reduction.reduction()
    ndata = reduction.second_data
    print(reduction.second_data)

    outfile = r'd:\data\demo.xlsx'
    moutexcel = Excel(outfile)
    moutexcel.new().append(ndata, 'mysheet')
    moutexcel.close()





    source_file = os.path.join(current_import_dir, file)
    # 目标文件
    file_name = re.split("\.", file)[0]
    new_file_name = "".join([file_name, "_first_step.xlsx"])
    new_delete_name = "".join([file_name, "_deleted.xlsx"])
    target_file = os.path.join(current_export_path, new_file_name)
    delete_file = os.path.join(current_export_path, new_delete_name)

    # 如果目标文件存在,那么跳过
    if os.path.exists(target_file):
        print("Here it is! ", file)
        continue

    # 从excel文件读入数据,构建CitydataReduction对象
    # mexcel = WinExcel(source_file)
    mexcel = Excel(source_file)
    sdata = mexcel.read()
    if abnormal:
        ndata = []
        first_part = []
        second_part = []
        for row in sdata:
            if set(row) == {None}:
                if len(first_part) < 1:
                    continue
                else:
                    ndata.extend(first_part)
                    ndata.extend(second_part)
                    first_part = []
                    second_part = []
            first_part.append(row[0:region_col])
result = []
journals = json.load(open(r'E:\gitrobot\files\publication\ssci_geography_json.txt'))
for journal in journals:
    if journal[1] not in impact_factor_journals:
        result.append([journal[0].upper(),journal[1],None])
    else:
        result.append([journal[0].upper(),journal[1],impact_factor_journals[journal[1]]])

# 2. output
for record in result:
    print(record)

outfile = r'd:\down\tmp_journal.xlsx'
moutexcel = Excel(outfile)
moutexcel.new().append(result, 'sheet1')
moutexcel.close()'''

mongo = MongoDB()
mongo.connect('publication','WesternJournal')
filename = r'd:\down\journals.xlsx'
mexcel = Excel(filename)
mdata = mexcel.read(sheet=4)
result = []
for item in mdata[1:]:
    if item[2] == '':
        result.append({'journal':item[0],'SSIN':item[1],'IF':None})
    else:
        result.append({'journal':item[0],'SSIN':item[1],'IF':item[2]})

#for j in result:
#    mongo.collection.insert_one(j)