コード例 #1
0
def GetReviewData(workDir):
    global sourceName

    sourceName = workDir.split('\\')[len(workDir.split('\\')) - 1]
    fileList = os.listdir(workDir)
    targetFileList = []

    for file in fileList:
        nameSplit = file.split('.')
        if len(nameSplit) >= 2:
            if nameSplit[len(nameSplit) - 1] == 'txt':
                targetFileList.append(file)

    outPut = ''
    for file in targetFileList:
        title = 'Now dividing reviews (' + str(targetFileList.index(file) +
                                               1) + '/' + str(
                                                   len(targetFileList)) + ')'
        try:
            fileDes = open(workDir + "\\" + file, 'r', encoding="utf-8")
        except:
            outPut += 'fail to open ' + file + '\n' + '\n'
            main.ShowTitle(title, outPut)
        else:
            outPut += Dividing(FileManager.FileReader(fileDes), file, title,
                               outPut)
            # DictionaryBuilder.AppendArticleDic(FileManager.FileReader(fileDes), file, title, outPut)

    WordSimilarity.baseDir = baseDir
    return WordSimilarity.ProcessAllProduct(title=None, outPut=outPut)
コード例 #2
0
def BuildWordDic(workDir):
    fileList = os.listdir(workDir)
    targetFileList = []

    for file in fileList:
        nameSplit = file.split('.')
        if len(nameSplit) >= 2:
            if nameSplit[len(nameSplit) - 1] == 'txt':
                targetFileList.append(file)

    outPut = ''
    for file in targetFileList:
        try:
            fileDes = open(workDir + "\\" + file, 'r', encoding="utf-8")
        except:
            outPut += 'fail to open ' + file + '\n' + '\n'
            continue

        title = 'Now processing articles (' + str(
            targetFileList.index(file) + 1) + '/' + str(
                len(targetFileList)) + ')'
        outPut += AppendArticleDic(FileManager.FileReader(fileDes),
                                   file.split('.')[0], title, outPut)

    main.ShowTitle(title, outPut)

    WordSimilarity.baseDir = baseDir
    return WordSimilarity.ProcessArticle(outPut=outPut, title=None)
コード例 #3
0
def DoManyQuery(queryList, db=None, title=None, outPut=None, queryType=None):
    if title == None:
        title = ''
    if outPut == None:
        outPut = ''
    if queryType == None:
        queryType = ''

    
        
    if queryList != []:
        updateTime = 0
        for index in range(0, math.ceil(len(queryList) / maximumQueryStactUnit)):
            currentTime = int(str(datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
            if updateTime < currentTime:
                updateTime = currentTime
                main.ShowTitle(title, outPut + 'Sending ' + queryType + ' query (' + str(index) + '/' + str(math.ceil(len(queryList) / maximumQueryStactUnit)) + ')')
            Query = ';'.join(queryList[index*maximumQueryStactUnit:min(maximumQueryStactUnit*(index+1), len(queryList))])
            DoSQL(Query, db)
コード例 #4
0
def GetSimilarity(target=None, title=None, outPut=None):
    global contents
    global baseDir
    global embedding_model

    if title == None:
        title = 'Building similarity data'
    else:
        title += '\nBuilding similarity data'
    if outPut == None:
        outPut = ''
    else:
        outPut = 'Process data of ' + outPut + '\n'

    currentTime = str(datetime.datetime.now().strftime('%Y#%m#%d&%H#%M#%S'))
    if target == None:
        saveDir = baseDir + '\\WordVectorData\\Normal\\' + currentTime + '.fasttext'
    else:
        saveDir = baseDir + '\\WordVectorData\\' + str(
            target) + '\\' + currentTime + '.fasttext'

    main.ShowTitle(title, outPut)
    if embedding_model == None:
        embedding_model = FastText(size=30,
                                   window=3,
                                   min_count=5,
                                   workers=4,
                                   sg=1)
        embedding_model.build_vocab(contents)
        embedding_model.train(contents,
                              total_examples=embedding_model.corpus_count,
                              epochs=embedding_model.epochs)
        embedding_model.save(saveDir)
    else:
        if len(contents) > 0:
            embedding_model.build_vocab(contents, update=True)
            embedding_model.train(contents,
                                  total_examples=embedding_model.corpus_count,
                                  epochs=embedding_model.epochs)
            embedding_model.save(saveDir)
コード例 #5
0
def SelectProduct(Mode, outPut=None, title=None):
    if title == None:
        title = ''
    if outPut == None:
        outPut = ''

    if Mode == 'Product':
        while True:
            main.ShowTitle(title, outPut)
            inputValue = input(
                'Enter product Name (%q to back %a to process all): ')

            if inputValue == '%q':
                return ''

            if inputValue == '%a':
                return ProcessAllProduct()

            productList = ReviewDivider.GetProductName(inputValue).get(
                'product_Name')
            main.ShowTitle(title, 'Data for ' + inputValue)
            number = 1
            if len(productList) > 0:
                for product in productList:
                    print(str(number) + '. ' + product)
                    number += 1
            else:
                print('No result')

            print('')
            print('r. Re-enter name')
            print('b. Back')

            inputValue = input("=> ")

            if inputValue == 'r':
                outPut = ''
                continue
            elif inputValue == 'b':
                return ''

            try:
                sqlResult = DataBaseManager.DoSQL("""
                SELECT  Product_ID
                FROM    product_dic
                WHERE   Product_Name = '""" + productList[int(inputValue) -
                                                          1] + """'
                """)[0]
                if sqlResult != []:
                    targetID = sqlResult[0]
                else:
                    outPut = 'Please enter correct number or charactor'
                    continue
            except:
                outPut = 'Please enter correct number or charactor'
                continue

            tableName = DataBaseManager.DoSQL("""
            SELECT  Relation_Table_Name
            FROM    product_dic
            WHERE   Product_ID = """ + str(targetID) + """
            LIMIT 1
            """)[0][0]
            if tableName == None:
                outPut = 'No review for ' + tableName
                continue

            GetContent(targetID,
                       title=title,
                       outPut=productList[int(inputValue) - 1])
            GetSimilarity(targetID,
                          title=title,
                          outPut=productList[int(inputValue) - 1])

            if len(contents) > 0:
                UpdateSimilarityDatabase(targetID,
                                         title=title,
                                         outPut=productList[int(inputValue) -
                                                            1])

            outPut = ''
            while True:
                main.ShowTitle('', outPut)
                inputValue = input('Enter target word (%q to back): ')

                if inputValue == '%q':
                    outPut = ''
                    break

                outPut = GetRelatedWord(tableName, inputValue)
    else:
        articleCount = DataBaseManager.DoSQL("""
        SELECT  COUNT(*)
        FROM    article_dic
        """)[0][0]
        if articleCount <= 0:
            return 'There is no data'

        ProcessArticle()

        outPut = ''
        while True:
            main.ShowTitle('', outPut)
            inputValue = input('Enter target word (%q to back): ')

            if inputValue == '%q':
                outPut = ''
                break

            outPut = GetRelatedWord('Normal', inputValue)
コード例 #6
0
def UpdateSimilarityDatabase(target=None, title=None, outPut=None):
    global embedding_model

    if title == None:
        title = 'Append Similar word relation'
    else:
        title += '\nAppend Similar word relation'
    if outPut == None:
        outPut = ''
    else:
        outPut = 'Process data of ' + outPut + '\n'

    main.ShowTitle(title, outPut + 'Getting exist data')

    if target == None:
        relationDict = {}
        relationList = DataBaseManager.DoSQL("""
        SELECT  Normal_Word, Target_Word, Similar_Relation_ID
        FROM    similar_word_relation
        """)
        for relation in relationList:
            if relation[0] == relation[1]:
                newWordDict = {relation[0]: {}}
                relationDict.update(newWordDict)
            else:
                newRelation = {relation[1]: relation[2]}
                relationDict.get(relation[0]).update(newRelation)
    else:
        sqlResult = DataBaseManager.DoSQL("""
        SELECT  Category_ID, Relation_Table_Name
        FROM    product_dic
        WHERE   Product_ID = """ + str(target) + """
        """)
        productInfo = sqlResult[0]

        featureList = [productInfo[1]]
        sqlResult = DataBaseManager.DoSQL("""
        SELECT  Feature_Name
        FROM    feature_dic
        WHERE   Category_ID = """ + str(productInfo[0]) + """
        """)
        for result in sqlResult:
            featureList.append(result[0])

    if target == None:
        main.ShowTitle(title,
                       outPut + 'Getting latest calculated similar data')
        wordList = []
        for word in embedding_model.wv.index2word:
            wordList.append(word)

        wordDict = {}
        removeList = []
        index = 0
        updateTime = 0
        while True:
            currentTime = int(
                str(datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
            if updateTime < currentTime:
                updateTime = currentTime
                main.ShowTitle(
                    title, outPut + 'Removing not verb and adjective (' +
                    str(index) + '/' + str(len(wordList)) + ' removed: ' +
                    str(len(removeList)) + ')')
            word = wordList[index]
            targetTag = ['VA', 'VV']
            if len(NLP.DoNLP(word, targetTag)) <= 0:
                removeList.append(index)
            index += 1
            if index >= len(wordList):
                break

        removeList.sort(reverse=True)
        for index in removeList:
            wordList.pop(index)

        wordDict = dict.fromkeys(wordList)

    insertQuery = []
    updateQuery = []
    index = 0
    if target == None:
        updateTime = 0
        for word in wordDict.keys():
            result = embedding_model.most_similar(
                positive=[word], topn=len(embedding_model.wv.index2word) - 1)
            currentTime = int(
                str(datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
            if updateTime < currentTime:
                updateTime = currentTime
                main.ShowTitle(
                    title, outPut + 'Append query (' + str(index) + '/' +
                    str(len(wordList)) + ')')
            for similar in result:
                try:
                    wordDict[similar[0]]
                except:
                    continue
                else:
                    existData = relationDict.get(word)
                    try:
                        relationID = existData[similar[0]]
                    except:
                        newQuery = """
                        INSERT INTO similar_word_relation (Normal_Word, Target_Word, Similar_Value)
                        VALUES      ('""" + word + """', '""" + similar[
                            0] + """', """ + str(similar[1]) + """)"""
                        insertQuery.append(newQuery)
                    else:
                        newQuery = """
                        UPDATE  similar_word_relation
                        SET     Similar_Value = """ + str(similar[1]) + """
                        WHERE   Similar_Relation_ID = """ + str(relationID)
                        updateQuery.append(newQuery)

            index += 1
    else:
        SentiWordBinder = BindSentiWords.BindSentiWords()
        updateTime = 0
        for feature in featureList:
            result = embedding_model.most_similar(
                positive=[feature],
                topn=len(embedding_model.wv.index2word) - 1)
            currentTime = int(
                str(datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
            if updateTime < currentTime:
                updateTime = currentTime
                main.ShowTitle(
                    title, outPut + 'Append query (' + str(index) + '/' +
                    str(len(featureList)) + ')')
            for similar in result:
                if feature != similar[0] and productInfo[1] != similar[0]:
                    # if feature != productInfo[1]:
                    #     try:
                    #         wordDict[similar[0]]
                    #     except:
                    #         updateQuery.append("""
                    #         UPDATE  `""" + productInfo[1] + """`
                    #         SET     `""" + feature + """` = null
                    #         WHERE   Word = '""" + similar[0] + """'
                    #         """)
                    #     else:
                    #         updateQuery.append("""
                    #         UPDATE  `""" + productInfo[1] + """`
                    #         SET     `""" + feature + """` = """ + str(similar[1]) + """
                    #         WHERE   Word = '""" + similar[0] + """'
                    #         """)
                    # else:
                    updateQuery.append("""
                    UPDATE  `""" + productInfo[1] + """`
                    SET     `""" + feature + """` = """ + str(similar[1]) + """
                    WHERE   Word = '""" + similar[0] + """'
                    """)

                    sentiValueDict = SentiWordBinder.BindSentiWords(
                        [similar[0]])
                    if sentiValueDict[similar[0]] != 'None':
                        updateQuery.append("""
                        UPDATE  `""" + productInfo[1] + """`
                        SET     Sentiment_Value = """ +
                                           sentiValueDict[similar[0]] + """
                        WHERE   Word = '""" + similar[0] + """'
                        """)

            updateQuery.append("""
            UPDATE  `""" + productInfo[1] + """`
            SET     `""" + feature + """` = null
            WHERE   Word_Count <= """ + str(5) + """
            """)

            index += 1

    if target == None:
        db = 'db_capstone'
    else:
        db = 'db_capstone_similarity'

    DataBaseManager.DoManyQuery(insertQuery,
                                db=db,
                                title=title,
                                outPut=outPut,
                                queryType='INSERT')
    DataBaseManager.DoManyQuery(updateQuery,
                                db=db,
                                title=title,
                                outPut=outPut,
                                queryType='UPDATE')
コード例 #7
0
def UpdateSimilarWordDictionary(title=None, outPut=None):
    if title == None:
        title = 'Update Similar word Dictionary'
    else:
        title += '\nUpdate Similar word Dictionary'
    if outPut == None:
        outPut = ''
    else:
        outPut = 'Process data of ' + outPut + '\n'

    main.ShowTitle(title, 'Getting similarity data')
    sqlResult = DataBaseManager.DoSQL("""
    SELECT  Normal_Word, Target_Word, Word_Count
    FROM    similar_word_relation
    WHERE   Similar_Value > 0.95
    ORDER BY Similar_Value DESC
    """)

    index = 0
    wordDict = {}
    updateTime = 0
    for result in sqlResult:
        currentTime = int(str(
            datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
        if updateTime < currentTime:
            updateTime = currentTime
            main.ShowTitle(
                title, outPut + 'Building relation dictionary (' + str(index) +
                '/' + str(len(sqlResult)) + ')')
        newRelation = {result[1]: result[2]}
        try:
            existRelation = wordDict[result[0]]
        except:
            wordRelation = {result[0]: newRelation}
        else:
            existRelation.update(newRelation)
            wordRelation = {result[0]: existRelation}
        wordDict.update(wordRelation)
        index += 1

    index = 0
    removeIndex = 0
    initialLength = len(wordDict)
    removeList = []
    updateTime = 0
    for key, relation in wordDict.items():
        currentTime = int(str(
            datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
        if updateTime < currentTime:
            updateTime = currentTime
            main.ShowTitle(
                title, 'Removing unnecessary word (' + str(index) + '/' +
                str(initialLength) + ' removed: ' + str(removeIndex) + ')')
        if len(relation) <= 1:
            removeList.append(key)
            removeIndex += 1
        index += 1

    index = 0
    updateTime = 0
    for key in removeList:
        currentTime = int(str(
            datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
        if updateTime < currentTime:
            updateTime = currentTime
            main.ShowTitle(
                title,
                outPut + 'Removing unnecessary word (' + str(index) + '/' +
                str(initialLength) + ' removed: ' + str(removeIndex) + ')')
        wordDict.pop(key)

    relatedWordDict = {}
    SentiWordBinder = BindSentiWords.BindSentiWords()
    index = 0
    updateTime = 0
    for key, value in wordDict.items():
        currentTime = int(str(
            datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
        if updateTime < currentTime:
            updateTime = currentTime
            main.ShowTitle(
                title, outPut + 'Calculating sentimental value (' +
                str(index) + '/' + str(len(wordDict)) + ')')
        compareList = []
        compareList.extend(value.keys())
        sentiValueDict = SentiWordBinder.BindSentiWords(compareList)

        keySentiValue = sentiValueDict[key]
        if keySentiValue != 'None':
            sentiValueDict.pop(key)
            for word, targetSentiValue in sentiValueDict.items():
                if targetSentiValue != 'None':
                    if int(keySentiValue) == int(targetSentiValue):
                        if value[key] > wordDict[word][word]:
                            newRelation = {word: key}
                        elif value[key] < wordDict[word][word]:
                            newRelation = {key: word}
                        relatedWordDict.update(newRelation)

    index = 0
    updateTime = 0
    for subWord, superWord in relatedWordDict.items():
        currentTime = int(str(
            datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
        if updateTime < currentTime:
            updateTime = currentTime
            main.ShowTitle(
                title, outPut + 'Building similar word dictionary (' +
                str(index) + '/' + str(len(relatedWordDict)) + ')')
        try:
            upperWord = relatedWordDict[superWord]
        except:
            continue
        else:
            relatedWordDict[subWord] = upperWord
            for key, value in relatedWordDict.items():
                if value == superWord:
                    relatedWordDict[key] = upperWord

    main.ShowTitle(title, outPut + 'Getting exist similar word dictionary')
    sqlResult = DataBaseManager.DoSQL("""
    SELECT  Sub_Word, Similar_ID
    FROM    similar_word_dic
    """)
    existRelatedWordDict = dict(sqlResult)
    insertQuery = []
    updateQuery = []

    index = 0
    updateTime = 0
    for subWord, superWord in relatedWordDict.items():
        currentTime = int(str(
            datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
        if updateTime < currentTime:
            updateTime = currentTime
            main.ShowTitle(
                title, outPut + 'Appending Query (' + str(index) + '/' +
                str(len(relatedWordDict)) + ')')
        try:
            dictionaryID = existRelatedWordDict[subWord]
        except:
            insertQuery.append("""
            INSERT INTO similar_word_dic (Sub_Word, Super_Word)
            VALUES ('""" + subWord + """', '""" + superWord + """')
            """)
        else:
            updateQuery.append("""
            UPDATE  similar_word_dic
            SET     Super_Word
            WHERE   Similar_ID = """ + str(dictionaryID) + """
            """)

    DataBaseManager.DoManyQuery(insertQuery,
                                title=title,
                                outPut=outPut,
                                queryType='INSERT')
    DataBaseManager.DoManyQuery(updateQuery,
                                title=title,
                                outPut=outPut,
                                queryType='UPDATE')
コード例 #8
0
def GetContent(target=None, title=None, outPut=None):
    global contents
    global embedding_model
    global baseDir

    embedding_model = None

    if title == None:
        title = 'Get data for Analyze similarity'
    else:
        title += '\nGet data for Analyze similarity'
    if outPut == None:
        outPut = ''
    else:
        outPut = 'Process data of ' + outPut + '\n'

    targetDataList = []
    main.ShowTitle(title, outPut + "Find exist data")
    if target == None:
        sqlResult = DataBaseManager.DoSQL("""
        SELECT  Date
        FROM    article_dic
        ORDER BY Article_ID DESC LIMIT 1
        """)
    else:
        sqlResult = DataBaseManager.DoSQL("""
        SELECT  Date
        FROM    review_dic
        WHERE   Product_ID = """ + str(target) + """
        ORDER BY Review_ID DESC LIMIT 1
        """)
    lastUpdateDate = sqlResult[0][0]

    targetFasttextList = []
    if target == None:
        targetDir = baseDir + '\\WordVectorData\\Normal'
    else:
        targetDir = baseDir + '\\WordVectorData\\' + str(target)

    if os.path.isdir(targetDir):
        fileList = os.listdir(targetDir)
        for file in fileList:
            extension = file.split('.')[-1]
            fileName = '.'.join(file.split('.')[:-1])
            if extension == 'fasttext':
                targetFasttextList.append(fileName)
    else:
        os.makedirs(targetDir)

    lastProcessDate = '0000-00-00 00:00:00'
    if len(targetFasttextList) > 0:
        targetFileName = max(targetFasttextList)
        lastProcessDate = '-'.join(
            targetFileName.split('&')[0].split('#')) + ' ' + ':'.join(
                targetFileName.split('&')[1].split('#'))
        if targetFileName > lastUpdateDate.strftime('%Y#%m#%d&%H#%M#%S'):
            embedding_model = FastText.load(targetDir + '\\' + targetFileName +
                                            '.fasttext')
            outPut = 'There is existing similarity data'
    else:
        embedding_model = None

    targetDataList = []
    main.ShowTitle(title, outPut + "Reading data")
    if target == None:
        sqlResult = DataBaseManager.DoSQL("""
        SELECT  Article
        FROM    article_dic
        WHERE   Date > '""" + lastProcessDate + """' AND Article != ''
        """)
    else:
        sqlResult = DataBaseManager.DoSQL("""
        SELECT  Review
        FROM    review_dic
        WHERE   Date > '""" + lastProcessDate + """' AND Review != ''
        """)

    for result in sqlResult:
        if result != []:
            targetDataList.extend(result)

    for data in targetDataList:
        contents.append(data.split('#'))

    return ''
コード例 #9
0
        outPut += "{:10}\t".format(dataList[i][0])
        outPut += "{:1.4}\t\t".format(dataList[i][1])
        if i < len(adjectiveList):
            outPut += "{:10}\t".format(adjectiveList[i][0])
            outPut += "{:1.4}".format(adjectiveList[i][1])
        outPut += '\n'

    return outPut


def Proceed(Mode, title='', outPut=''):
    return SelectProduct(Mode, title, outPut)


if __name__ == '__main__':
    baseDir = os.getcwd()

    outPut = ''
    while True:
        main.ShowTitle(outPut)
        print('1. Product\n2. Normal word')
        inputValue = input('=> ')

        if inputValue == '1':
            outPut = Proceed('Product')
        elif inputValue == '2':
            outPut = Proceed('Normal')
        elif inputValue == 'q':
            exit()
        else:
            outPut = 'Please enter correct value or charactor\n'
コード例 #10
0
def ManageProduct(title=None, outPut=None):
    if title == None:
        title = ''
    if outPut == None:
        outPut = ''

    GetProductDic()

    while True:
        main.ShowTitle(title, outPut)

        productName = input('Enter product name (%q to back): ')

        if productName == '%q':
            return ''

        while True:
            productList = GetProductName(productName).get('product_Name')
            main.ShowTitle('Data for ' + productName + '\n' + outPut, title)
            number = 1
            for product in productList:
                print(str(number) + '. Modify ' + product)
                number += 1
            print('')
            print('a. Add new product (' + productName + ')')
            print('r. Re-enter name')
            print('b. Back')
            inputValue = input('=> ')

            targetIndex = -1
            try:
                targetIndex = int(inputValue)
            except:
                if inputValue == 'r':
                    outPut = ''
                    break
                elif inputValue == 'b':
                    return ''
                elif inputValue == 'a':
                    outPut = ''
                    outPutWork = 'Add new product (' + productName + ')'
                else:
                    outPut = 'Please enter correct number or charactor'
                    continue

            if targetIndex != -1:
                productPureName = productList[targetIndex - 1]
                while True:
                    main.ShowTitle(title, outPut)
                    print('1. Modify\n2. Delete')
                    modeInput = input('=> ')

                    if modeInput == '1':
                        outPut = ''
                        outPutWork = 'Modify ' + productPureName
                        break
                    elif modeInput == '2':
                        ProductDictionaryRemove(productPureName)
                        break

            outPutState = ''
            proceed = True
            name = ''
            propertyList = ['', []]
            DiscriptionList = []
            if inputValue != 'a':
                outPutState = '/ Name'
                main.ShowTitle(outPutWork + outPutState + '\n' + outPut, title)
                nameInputValue = input(
                    'Enter product name (%q to cancel add / %s to skip):')
                if nameInputValue == '%q':
                    proceed == False
                    outPut = ''
                elif nameInputValue == '%s':
                    name = ''
                    outPut = ''
                else:
                    name = nameInputValue
                    outPut = ''
            else:
                name = productName
                outPut = ''

            if proceed == False:
                continue

            for i in range(0, 2):
                stateString = [' / Carrier', ' / Category']
                targetString = ['carrier', 'category']
                if i == 0:
                    dataList = GetProductCarrierList(0)
                else:
                    dataList = GetProductCategoryList()
                while True:
                    main.ShowTitle(outPutWork + stateString[i] + '\n' + outPut,
                                   title)
                    for ID, propertyName in dataList.items():
                        print(str(ID) + '. ', end='')
                        print(propertyName)
                    print('')
                    if inputValue != 'a':
                        print('s. Skip')
                    if i == 1:
                        print('a. Add new ' + targetString[i])
                    print('b. Cancel add')
                    propertyInputValue = input('=> ')

                    if propertyInputValue == 'b':
                        proceed = False
                        outPut = ''
                        break
                    elif propertyInputValue == 's':
                        if inputValue != 'a':
                            propertyList[i] = ''
                            outPut = ''
                            break
                        else:
                            outPut = 'Please input without %'
                            continue
                    elif propertyInputValue == 'a':
                        if i == 0:
                            outPut = 'Please enter correct number or charactor'
                            continue
                        main.ShowTitle(
                            outPutWork + outPutState + '\n' + outPut, title)
                        print('Enter' + targetString[i] +
                              '!NO SPACE ENTER! (%q to cancel add',
                              end='')
                        if inputValue != 'a':
                            print(' / %s to skip', end='')
                        propertyInputValue = input('): ')

                        if propertyInputValue == '%q':
                            outPut = ''
                            proceed = False
                            break
                        elif propertyInputValue == '%s':
                            if inputValue != 'a':
                                outPut = ''
                                propertyList[i] = ''
                                break
                            else:
                                outPut = 'Please input without %'
                                continue

                        propertyList[i] = propertyInputValue.replace(' ', '')
                    else:
                        try:
                            if propertyInputValue in dataList.keys():
                                propertyIndex = int(propertyInputValue)
                            else:
                                outPut = 'Please enter correct number or charactor'
                                continue
                        except:
                            outPut = 'Please enter correct number or charactor'
                            continue

                        propertyList[i] = dataList[propertyIndex - 1]
                        outPut = ''
                        break

                if proceed == False:
                    continue

            if proceed == False:
                continue

            for i in range(0, 2):
                stateString = [' / Main discription', ' / Sub discription']
                targetString = ['main discription', 'sub discription']
                newList = []
                while True:
                    listString = ' data: ' + ', '.join(newList)
                    if inputValue != 'a':
                        listString += '(prev data: ' + ', '.join(
                            productDic[targetIndex][i + 3]) + ')'
                    main.ShowTitle(
                        outPutWork + stateString[i] + listString + '\n' +
                        outPut, title)
                    print(
                        'Enter ' + targetString[i] +
                        ' !NO SPACE ENTER! (%q to cancel add / %f to finish add',
                        end='')
                    if inputValue != 'a':
                        print(' / %s to skip', end='')
                    discriptionInputValue = input('): ')

                    if discriptionInputValue == '%q':
                        proceed == False
                        outPut = ''
                        break
                    elif discriptionInputValue == '%f':
                        outPut = ''
                        break
                    elif discriptionInputValue == '%s':
                        if inputValue != 'a':
                            newList = None
                            outPut = ''
                            break
                        else:
                            outPut = 'Please input without %'
                            continue

                    newList.append(discriptionInputValue)
                    outPut = ''

                if proceed == False:
                    break

                DiscriptionList.append(newList)

            if proceed:
                if inputValue == 'a':
                    ProductDictionaryAppend(name, propertyList[0],
                                            propertyList[1],
                                            DiscriptionList[0],
                                            DiscriptionList[1])
                else:
                    ProductDictionaryModify(targetIndex, name, propertyList[0],
                                            propertyList[1],
                                            DiscriptionList[0],
                                            DiscriptionList[1])
コード例 #11
0
def Dividing(reviewData, fileName, title=None, outPut=None):
    global similarInsertQuery
    global similarUpdateQuery
    global insertQuery
    global updateQuery

    if title == None:
        title = ''
    if outPut == None:
        outPut = ''

    similarInsertQuery = []
    similarUpdateQuery = []
    insertQuery = []
    updateQuery = []

    completeIndex = 0
    noProductIndex = 0
    skippedIndex = 0

    sqlResult = DataBaseManager.DoSQL("""
    SELECT  Review_ID, Review_Number
    FROM    review_dic
    """)
    completedReview = dict(sqlResult)

    updateTime = 0
    for data in reviewData:
        currentTime = int(str(
            datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
        if updateTime < currentTime:
            updateTime = currentTime
            additionString = ''
            if noProductIndex > 0 or skippedIndex > 0:
                additionString += ' ('
                if skippedIndex > 0:
                    additionString += 'skipped: ' + str(skippedIndex)
                if noProductIndex > 0 and skippedIndex > 0:
                    additionString += ' / '
                if noProductIndex > 0:
                    additionString += 'not product: ' + str(noProductIndex)
                additionString += ')'
            main.ShowTitle(
                title, outPut + 'Building dictionary for ' + fileName + ' (' +
                str(completeIndex + skippedIndex + noProductIndex) + '/' +
                str(len(reviewData)) + ')' + additionString)

        splitData = data.split(',')
        if len(splitData) < 2:
            return 'No data in ' + fileName

        reviewNumber = fileName + '-' + splitData[0]
        splitData.remove(splitData[0])
        reviewTitleString = splitData[0]
        reviewTitleString = reviewTitleString.replace('\n', '')
        reviewTitleString = reviewTitleString.replace(';', ',')
        splitData.remove(splitData[0])
        reviewString = ''.join(splitData)
        reviewString = reviewString.replace('\n', '')
        reviewString = reviewString.replace(';', ',')

        if reviewNumber in completedReview.values():
            skippedIndex += 1
            continue

        if reviewTitleString == '!e':
            continue

        reviewTitleStringList = NLP.DoNLP(reviewTitleString, None, 'Review')
        reviewStringList = NLP.DoNLP(reviewString, None, 'Review')

        resultList = GetProductName(
            ' '.join(reviewTitleStringList),
            ' '.join(reviewStringList)).get('product_Name')

        if len(resultList) > 0:
            resultStringList = DictionaryBuilder.ConvertNormalWord(
                mainStringList=reviewTitleStringList,
                subStringList=reviewStringList,
                mode='Review')
            resultString = '#'.join(resultStringList)

            for name in resultList:
                updateQuery.append("""
                UPDATE  product_dic
                SET     Count = Count + 1
                WHERE   Product_Name = '""" + name + """'
                """)
                insertQuery.append("""
                INSERT INTO review_dic (Review_Number, Review, Product_ID)
                VALUES ('""" + reviewNumber + """', '""" + resultString +
                                   """', """ +
                                   str(productDic[name].get('productID')) +
                                   """)
                """)

                for word in resultStringList:
                    try:
                        wordDict = productSimilarDic[name]
                    except:
                        wordDict = {}

                    try:
                        currentCount = wordDict[word]
                    except:
                        currentCount = 0

                    wordInfo = {word: currentCount + 1}
                    wordDict.update(wordInfo)
                    newItem = {name: wordDict}
                    productSimilarDic.update(newItem)

            completeIndex += 1
        else:
            noProductIndex += 1

    returnString = "Complete building dictionary for " + fileName
    if skippedIndex > 0 or noProductIndex > 0:
        returnString += ' ('
        if skippedIndex > 0:
            returnString += 'skipped: ' + str(skippedIndex)
        if skippedIndex > 0 and noProductIndex > 0:
            returnString += ' / '
        if noProductIndex > 0:
            returnString += 'not product: ' + str(noProductIndex)
        returnString += ')'
    returnString += '\n'

    AppendWordDicQuery(title=title, outPut=outPut + returnString)
    DataBaseManager.DoManyQuery(insertQuery,
                                title=title,
                                outPut=outPut + returnString,
                                queryType='INSERT')
    DataBaseManager.DoManyQuery(updateQuery,
                                title=title,
                                outPut=outPut + returnString,
                                queryType='UPDATE')
    DataBaseManager.DoManyQuery(similarInsertQuery,
                                'db_capstone_similarity',
                                title=title,
                                outPut=outPut + returnString,
                                queryType='INSERT')
    DataBaseManager.DoManyQuery(similarUpdateQuery,
                                'db_capstone_similarity',
                                title=title,
                                outPut=outPut + returnString,
                                queryType='UPDATE')

    return returnString
コード例 #12
0
def AppendArticleDic(reviewData, fileName, title=None, outPut=None):
    global insertQuery
    global updateQuery

    if title == None:
        title = ''
    if outPut == None:
        outPut = ''

    insertQuery = []
    updateQuery = []
    stackUnit = DataBaseManager.maximumQueryStactUnit

    completeIndex = 0
    skippedIndex = 0

    articleLastID = DataBaseManager.DoSQL("""
    SELECT `AUTO_INCREMENT`
    FROM  INFORMATION_SCHEMA.TABLES
    WHERE TABLE_SCHEMA = 'db_capstone'
    AND   TABLE_NAME   = 'article_dic';
    """)[0][0]
    articleNumberList = []
    index = 0
    while True:
        articleNumberList.extend(
            DataBaseManager.DoSQL("""
        SELECT  Article_ID, Article_Number
        FROM    article_dic
        WHERE   Article_ID > """ + str(index) + """ AND Article_ID <= """ +
                                  str(index + stackUnit)))
        index += stackUnit
        if index > articleLastID:
            break
    completedReview = dict(articleNumberList)
    wordDic = {}

    main.ShowTitle(
        title, outPut + 'Building dictionary for ' + fileName + ' (' +
        str(completeIndex) + '/' + str(len(reviewData)) + ')')
    updateTime = int(str(datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
    for data in reviewData:
        currentTime = int(str(
            datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
        if updateTime < currentTime:
            updateTime = currentTime
            main.ShowTitle(
                title, outPut + 'Building dictionary for ' + fileName + ' (' +
                str(completeIndex + skippedIndex) + '/' +
                str(len(reviewData)) + ')')

        splitData = data.split(',')
        if len(splitData) < 2:
            return 'No data in ' + fileName

        reviewNumber = fileName + '-' + splitData.pop(0)
        reviewString = splitData.pop(0) + ','
        reviewString += ''.join(splitData)
        reviewString = reviewString.replace('\n', '')
        reviewString = reviewString.replace(';', ',')

        if reviewNumber in completedReview.values():
            skippedIndex += 1
            continue

        if reviewString == '!e':
            continue
        resultStringList = ConvertNormalWord(reviewString, reviewString)

        resultString = '#'.join(resultStringList)
        insertQuery.append("""
        INSERT INTO article_dic (Article_Number, Article)
        VALUES ('""" + reviewNumber + """', '""" + resultString + """')""")

        for word in resultStringList:
            try:
                currentCount = wordDic[word]
            except:
                currentCount = 0

            newItem = {word: currentCount + 1}
            wordDic.update(newItem)

        completeIndex += 1

    AppendWordDicQuery(wordDic)

    returnString = "Complete building dictionary for " + fileName
    if skippedIndex > 0:
        returnString += ' (skipped ' + str(skippedIndex) + ' of ' + str(
            len(reviewData)) + ' review)'
    returnString += '\n'

    DataBaseManager.DoManyQuery(insertQuery,
                                title=title,
                                outPut=outPut + returnString,
                                queryType='INSERT')
    DataBaseManager.DoManyQuery(updateQuery,
                                title=title,
                                outPut=outPut + returnString,
                                queryType='UPDATE')

    return returnString