Esempio n. 1
0
def Ttest(name):
    cluster_dim = request.get_json()  #bytes
    data = []
    Tnum = float(cluster_dim["Tnum"])
    # print(name)
    fileN = db.searchFile(name)
    # print(fileN)
    fileN = fileN[0]['filename']
    # print(fileN)
    if fileN == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    with open(name + '/' + fileN, 'r', newline='') as csvFile:
        csv_file = csv.reader(csvFile)
        for content in csv_file:
            content = list(map(float, content))
            if len(content) != 0:
                data.append(content)
    result = stats.ttest_1samp(data, Tnum)
    print("result\n")
    print(result)
    with open(name + '/' + 'Ttest_' + fileN, 'w', newline='') as new_file:
        csv_writer = csv.writer(new_file)
        csv_writer.writerow(result[1])
        if result[1] < 0.05:
            csv_writer.writerow(["The difference is significant"])
        else:
            csv_writer.writerow(["The difference is not significant"])

    ret = {"route": 'Ttest_' + fileN}
    return json.dumps(ret)
Esempio n. 2
0
def pdf2word(name):
    # pip install pdfminer3k
    # pip install python_docx
    fileN = db.searchFile(name)
    # print(fileN)
    fileN = fileN[0]['filename']
    # print(fileN)
    if fileN == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    pdfname = fileN
    document = Document()
    pdf = os.open(name + '/' + pdfname, os.O_RDWR)
    fn = open(pdf, 'rb')
    parser = PDFParser(fn)
    doc = PDFDocument()
    parser.set_document(doc)
    doc.set_parser(parser)
    resource = PDFResourceManager()
    laparams = LAParams()
    device = PDFPageAggregator(resource, laparams=laparams)
    interpreter = PDFPageInterpreter(resource, device)
    for i in doc.get_pages():
        interpreter.process_page(i)
        layout = device.get_result()
        for out in layout:
            if hasattr(out, "get_text"):
                content = out.get_text().replace(u'\n', u'')
                document.add_paragraph(content, style='ListBullet')
            document.save(name + '/' + pdfname[0:-4] + '.docx')
    print('处理完成')
    ret = {"route": pdfname[0:-4] + '.docx'}
    return json.dumps(ret)
Esempio n. 3
0
def nb(name):
    print("first")
    fileN = db.searchFile(name)
    print("second")
    print(fileN)
    isfile = fileN[0]['filename']
    # print(fileN)
    if isfile == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    if len(fileN) < 2:
        ret = {"route": '需要上传训练集和测试集以进行模型训练与测试'}
        return json.dumps(ret)
    train = []
    test = []
    train_file = fileN[0]['filename']
    test_file = fileN[1]['filename']
    print(train_file)
    print(test_file)
    train = get_nb_train_dataset(name, train_file)
    test = get_nb_test_dataset(name, test_file)
    print('train=', train)
    print('test=', test)
    train_data, train_target, valid_data, valid_target = nb_train_and_valid_data(
        train)
    nbc_6 = Pipeline([
        ('vect', TfidfVectorizer()),
        ('clf', MultinomialNB(alpha=1.0)),
    ])
    nbc_6.fit(train_data, train_target)  #训练我们的多项式模型贝叶斯分类器
    valid = nbc_6.predict(valid_data)  #在测试集上预测结果
    print(valid)
    count = 0  #统计预测正确的结果个数
    for left, right in zip(valid, valid_target):
        if left == right:
            count += 1
    print(count / len(valid_target))
    predict = nbc_6.predict(test)
    print(predict)
    p_list = []
    for p in predict:
        list_tem = []
        list_tem.append(p)
        p_list.append(list_tem)
    print(p_list)
    with open(name + '/' + 'NaiveBayes.csv', 'w', newline='') as new_file:
        csv_writer = csv.writer(new_file)
        for line in p_list:
            # print(line)
            csv_writer.writerow(list(line))
    ret = {"route": 'NaiveBayes.csv'}
    return json.dumps(ret)
Esempio n. 4
0
def workCloud(name):
    # print(name)
    fileN = db.searchFile(name)
    # print(fileN)
    fileN = fileN[0]['filename']
    # print(fileN)
    if fileN == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    txtfile = fileN
    print(txtfile)
    f = open(name + '/' + txtfile, encoding='gb18030')
    text = f.read()
    print(text)
    f.close()
    # Read the whole text.
    # #获取当前的项目文件加的路径
    # d=path.dirname(__file__)
    # text = open(path.join(d, txtfile))
    # 结巴分词
    wordlist = jieba.cut(text, cut_all=True)
    wl = " ".join(wordlist)
    print(wl)  #输出分词之后的txt
    coloring = np.array(Image.open("wordcloud/background.jpg"))

    # 设置停用词
    stopwords = set(STOPWORDS)
    stopwords.add("said")

    # 你可以通过 mask 参数 来设置词云形状
    # wc = WordCloud(background_color="white", max_words=2000, mask=coloring,
    #                 max_font_size=50, random_state=42,font_path='fangsong_GB2312.ttf')
    wc = WordCloud(background_color="white",
                   max_words=2000,
                   mask=coloring,
                   max_font_size=50,
                   random_state=42,
                   font_path='Hiragino Sans GB.ttc')
    wc.generate(wl)

    # create coloring from image
    image_colors = ImageColorGenerator(coloring)
    # show
    # 在只设置mask的情况下,你将会得到一个拥有图片形状的词云
    # plt.imshow(wc, interpolation="bilinear")
    # plt.axis("off")
    # plt.figure()
    # plt.show()
    wc.to_file(name + '/' + "workcloud.png")
    ret = {"route": 'workcloud.png'}
    return json.dumps(ret)
Esempio n. 5
0
def MIC(name):
    # pip install minepy
    fileN = db.searchFile(name)
    isfile = fileN[0]['filename']
    # print(fileN)
    if isfile == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)

    if len(fileN) < 2:
        ret = {"route": '需要上传两个文件以进行MIC计算'}
        return json.dumps(ret)
    x = []
    y = []
    file1 = fileN[0]['filename']
    file2 = fileN[1]['filename']
    csvFile1 = open(name + '/' + file1, encoding='utf-8-sig')
    csv_file1 = csv.reader(csvFile1)
    for content in csv_file1:
        print(content)
        content = list(map(float, content))
        if len(content) != 0:
            x.append(float(content[0]))
    csvFile1.close()
    print('x=', x)
    csvFile2 = open(name + '/' + file2, encoding='utf-8-sig')
    csv_file2 = csv.reader(csvFile2)
    for content in csv_file2:
        content = list(map(float, content))
        if len(content) != 0:
            y.append(float(content[0]))
    csvFile2.close()
    print('y=', y)
    mine = MINE(alpha=0.6, c=15)
    mine.compute_score(x, y)
    print("MIC", mine.mic())
    #将MIC值写入文件
    with open(name + '/' + 'MIC_result.csv', 'w', newline='') as new_file:
        csv_writer = csv.writer(new_file)
        csv_writer.writerow(["MIC result"])
        data = []
        data.append(str(mine.mic()))
        csv_writer.writerow(data)
    ret = {"route": 'MIC_result.csv'}
    return json.dumps(ret)
Esempio n. 6
0
def randomForest(name):
    fileN = db.searchFile(name)
    isfile = fileN[0]['filename']
    # print(fileN)
    if isfile == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    if len(fileN) < 2:
        ret = {"route": '需要上传训练集和测试集以进行模型训练与测试'}
        return json.dumps(ret)
    train = []
    test = []
    train_file = fileN[0]['filename']
    test_file = fileN[1]['filename']
    print(train_file)
    print(test_file)
    train = get_train_dataset(name, train_file)
    test = get_test_dataset(name, test_file)
    print('train=', train)
    print('test=', test)
    train_data, train_target, valid_data, valid_target = train_valid_data(
        train)
    # Create the model with 100 trees
    model = RandomForestClassifier(n_estimators=100,
                                   bootstrap=True,
                                   max_features='sqrt')
    # Fit on training data
    model.fit(train_data, train_target)
    print("train score:", model.score(train_data, train_target))
    print("valid score:", model.score(valid_data, valid_target))
    predict = model.predict(test)
    print(predict)
    p_list = []
    for p in predict:
        list_tem = []
        list_tem.append(p)
        p_list.append(list_tem)
    print(p_list)
    with open(name + '/' + 'RandomForest.csv', 'w', newline='') as new_file:
        csv_writer = csv.writer(new_file)
        for line in p_list:
            # print(line)
            csv_writer.writerow(list(line))
    ret = {"route": 'RandomForest.csv'}
    return json.dumps(ret)
Esempio n. 7
0
def linearRegression(name):
    fileN = db.searchFile(name)
    isfile = fileN[0]['filename']
    # print(fileN)
    if isfile == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    if len(fileN) < 2:
        ret = {"route": '需要上传训练集和测试集以进行模型训练与测试'}
        return json.dumps(ret)
    train = []
    test = []
    train_file = fileN[0]['filename']
    test_file = fileN[1]['filename']
    print(train_file)
    print(test_file)
    train = get_train_dataset(name, train_file)
    test = get_test_dataset(name, test_file)
    print('train=', train)
    print("\n")
    train_data, train_target, valid_data, valid_target = train_valid_data(
        train)
    linreg = LinearRegression()
    model = linreg.fit(train_data, train_target)
    print(model)
    # 训练后模型截距
    print(linreg.intercept_)
    # 训练后模型权重(特征个数无变化)
    print(linreg.coef_)
    predict = linreg.predict(test)
    p_list = []
    for p in predict:
        list_tem = []
        list_tem.append(p)
        p_list.append(list_tem)
    print(p_list)
    with open(name + '/' + 'linearRegression.csv', 'w',
              newline='') as new_file:
        csv_writer = csv.writer(new_file)
        for line in p_list:
            # print(line)
            csv_writer.writerow(list(line))
    ret = {"route": 'linearRegression.csv'}
    return json.dumps(ret)
Esempio n. 8
0
def svr(name):
    fileN = db.searchFile(name)
    isfile = fileN[0]['filename']
    # print(fileN)
    if isfile == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    if len(fileN) < 2:
        ret = {"route": '需要上传训练集和测试集以进行模型训练与测试'}
        return json.dumps(ret)
    train = []
    test = []
    train_file = fileN[0]['filename']
    test_file = fileN[1]['filename']
    print(train_file)
    print(test_file)
    train = get_train_dataset(name, train_file)
    test = get_test_dataset(name, test_file)
    print('train=', train)
    print('test=', test)
    train_data, train_target, valid_data, valid_target = train_valid_data(
        train)
    svrlassifier = SVR(kernel='rbf', c=20)
    # Fit on training data
    svrlassifier.fit(train_data, train_target)
    print("train score:", svrlassifier.score(train_data, train_target))
    print("valid score:", svrlassifier.score(valid_data, valid_target))
    predict = svrlassifier.predict(test)
    print(predict)
    p_list = []
    for p in predict:
        list_tem = []
        list_tem.append(p)
        p_list.append(list_tem)
    print(p_list)
    with open(name + '/' + 'svr.csv', 'w', newline='') as new_file:
        csv_writer = csv.writer(new_file)
        for line in p_list:
            # print(line)
            csv_writer.writerow(list(line))
    ret = {"route": 'svr.csv'}
    return json.dumps(ret)
Esempio n. 9
0
def decisionTree(name):
    fileN = db.searchFile(name)
    isfile = fileN[0]['filename']
    # print(fileN)
    if isfile == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    if len(fileN) < 2:
        ret = {"route": '需要上传训练集和测试集以进行模型训练与测试'}
        return json.dumps(ret)
    train = []
    test = []
    train_file = fileN[0]['filename']
    test_file = fileN[1]['filename']
    print(train_file)
    print(test_file)
    train = get_train_dataset(name, train_file)
    test = get_test_dataset(name, test_file)
    # print('test=',test)
    train_data, train_target, valid_data, valid_target = train_valid_data(
        train)
    model = tree.DecisionTreeClassifier(criterion='gini')
    # print('train_data=',train_data)
    # print('train_target=',train_target)
    model.fit(train_data, train_target)
    print("train score:", model.score(train_data, train_target))
    print("valid score:", model.score(valid_data, valid_target))
    predict = model.predict(test)
    print(predict)
    p_list = []
    for p in predict:
        list_tem = []
        list_tem.append(p)
        p_list.append(list_tem)
    print(p_list)
    with open(name + '/' + 'DecisionTree.csv', 'w', newline='') as new_file:
        csv_writer = csv.writer(new_file)
        for line in p_list:
            # print(line)
            csv_writer.writerow(list(line))
    ret = {"route": 'DecisionTree.csv'}
    return json.dumps(ret)
Esempio n. 10
0
def pca(name):
    pca_dim = request.get_json()  #bytes
    data = []
    traffic_feature = []
    traffic_target = []
    # fileN = file_name.pop()
    # print(name)
    fileN = db.searchFile(name)
    # print(fileN)
    fileN = fileN[0]['filename']
    # print(fileN)
    if fileN == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    csvFile = open(name + '/' + fileN)
    csv_file = csv.reader(csvFile)
    for content in csv_file:
        content = list(map(float, content))
        if len(content) != 0:
            data.append(content)
            traffic_feature.append(content[0:-2])
            traffic_target.append(content[-1])
    csvFile.close()
    min_max_scaler = preprocessing.MinMaxScaler()
    traffic_feature = min_max_scaler.fit_transform(traffic_feature)
    # print('data=',data)
    # print('traffic_feature=',traffic_feature)
    # print('traffic_target=',traffic_target)
    dim = int(pca_dim["pca"])
    sklearn_pca = sklearnPCA(n_components=dim)
    sklearn_transf = sklearn_pca.fit_transform(traffic_feature)
    # print(sklearn_transf)
    with open(name + '/' + 'PCA_' + fileN, 'w', newline='') as new_file:
        csv_writer = csv.writer(new_file)
        for line in sklearn_transf:
            # print(line)
            csv_writer.writerow(line)
    ret = {"route": 'PCA_' + fileN}
    return json.dumps(ret)
Esempio n. 11
0
def kmeans(name):
    cluster_dim = request.get_json()  #bytes
    data = []
    cluster = int(cluster_dim["kmeans"])
    # print(name)
    fileN = db.searchFile(name)
    # print(fileN)
    fileN = fileN[0]['filename']
    # print(fileN)
    if fileN == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    with open(name + '/' + fileN, 'r', newline='') as csvFile:
        csv_file = csv.reader(csvFile)
        for content in csv_file:
            content = list(map(float, content))
            if len(content) != 0:
                data.append(content)
    estimator = KMeans(n_clusters=cluster)  #构造聚类器
    estimator.fit(data)  #聚类
    label_pred = estimator.labels_  #获取聚类标签
    centroids = estimator.cluster_centers_  #获取聚类中心
    with open(name + '/' + 'kmeans_' + fileN, 'w', newline='') as new_file:
        csv_writer = csv.writer(new_file)
        csv_writer.writerow(["label"])
        for line in label_pred:
            # print(line)
            temp = str(line)
            # print(temp)
            csv_writer.writerow([temp])
        csv_writer.writerow(["clusterCenter"])
        for line in centroids:
            # print(line)
            csv_writer.writerow(line)

    ret = {"route": 'kmeans_' + fileN}
    return json.dumps(ret)
Esempio n. 12
0
def polydata(name):
    fileN = db.searchFile(name)
    # print(fileN)
    fileN = fileN[0]['filename']
    # print(fileN)
    if fileN == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    csv_file = fileN
    csv_data = pd.read_csv(name + '/' + csv_file, low_memory=False)  #防止弹出警告
    data = pd.DataFrame(csv_data)

    for i in data.columns:
        for j in range(len(data)):
            if (np.isnan(data[i][j])):
                data[i][j] = ploy(data[i], j)
                # print(data[i][j])

    data.to_csv(name + '/' + csv_file[0:-4] + '.csv',
                index=False,
                header=False)
    print('处理完成')
    ret = {"route": csv_file[0:-4] + '.csv'}
    return json.dumps(ret)
Esempio n. 13
0
def dtw(name):
    fileN = db.searchFile(name)
    isfile = fileN[0]['filename']
    # print(fileN)
    if isfile == "dont_have_file":
        ret = {"route": "nofile"}
        return json.dumps(ret)
    if len(fileN) < 2:
        ret = {"route": '需要上传两个文件以进行时序列dtw分析'}
        return json.dumps(ret)
    data1 = []
    data2 = []
    file1 = fileN[0]['filename']
    file2 = fileN[1]['filename']
    csvFile1 = open(name + '/' + file1)
    csv_file1 = csv.reader(csvFile1)
    for content in csv_file1:
        content = list(map(float, content))
        if len(content) != 0:
            data1.append(float(content[0]))
    csvFile1.close()
    print('data1=', data1)
    csvFile2 = open(name + '/' + file2)
    csv_file2 = csv.reader(csvFile2)
    for content in csv_file2:
        content = list(map(float, content))
        if len(content) != 0:
            data2.append(float(content[0]))
    csvFile2.close()
    print('data2=', data2)
    r, c = len(data1), len(data2)
    D0 = zeros((r + 1, c + 1))
    D0[0, 1:] = inf
    D0[1:, 0] = inf
    D1 = D0[1:, 1:]
    #浅复制
    # print D1
    for i in range(r):
        for j in range(c):
            D1[i, j] = euclidean_distances(data1[i], data2[j])
    #生成原始距离矩阵
    M = D1.copy()
    for i in range(r):
        for j in range(c):
            D1[i, j] += min(D0[i, j], D0[i, j + 1], D0[i + 1, j])
    #代码核心,动态计算最短距离
    i, j = array(D0.shape) - 2
    #最短路径
    # print i,j
    p, q = [i], [j]
    while (i > 0 or j > 0):
        tb = argmin((D0[i, j], D0[i, j + 1], D0[i + 1, j]))
        if tb == 0:
            i -= 1
            j -= 1
        elif tb == 1:
            i -= 1
        else:
            j -= 1
        p.insert(0, i)
        q.insert(0, j)
    print(M)
    #原始距离矩阵
    print(zip(p, q))
    #匹配路径过程
    print(D1)
    #Cost Matrix或者叫累积距离矩阵
    print(D1[-1, -1])
    dis = []
    dis.append(D1[-1, -1])
    print(dis)
    #序列距离
    with open(name + '/' + 'DTW_result.csv', 'w', newline='') as new_file:
        csv_writer = csv.writer(new_file)
        csv_writer.writerow(["Cost Matrix"])
        for line in D1:
            # print(line)
            csv_writer.writerow(line)
        csv_writer.writerow(["Sequence distance"])
        csv_writer.writerow(dis)
    ret = {"route": 'DTW_result.csv'}
    return json.dumps(ret)