Ejemplo n.º 1
0
 def __init__(self):
     self.sentences = []
     self.matchesCrafted = []
     self.matchesRegex = []
     self.matchesNeural = []
     self.title = ""
     self.textLoaded = False
     self.Neural = False
     self.st = SplitText(self)
     self.gt = GetText()
     self.ch = Chunking()
     self.ch.addTT(self)
     self.nameListNeural = [
         "Value", "Pattern Grammar", "Pattern Sentence", "Article Grammar",
         "Article Sentence", "Compared"
     ]
     self.notFile = "C:\\Users\\trace\\projects\\python\\masters\\informationPull\\pdata\\notlist.txt"
     self.resultFolder = 'C:\\Users\\trace\\projects\\python\\masters\\informationPull\\pdata\\results'
     self.notList = []
     self.patterns = False
     self.load()
Ejemplo n.º 2
0
def main(dramaname, autor, act):
    u"""Main zum Ausführen des Programms."""
    gd = GetData()
    gt = GetText()
    gs = GetSentiment()
    gm = GraphMalen()

    if dramaname:
        dramaname = dramaname
    else:
        dramaname = click.prompt('Gib den Namen eines Dramas ein')
    if autor:
        autor = autor
    else:
        autor = click.prompt('Gib den Nachnamen des Autors ein')
    draname = gd.eingabe_drama(dramaname, autor)
    tei = gd.get_tei(draname)
    csv_drama = gd.get_csv(draname)
    replik = gt.create_replik_dict(csv_drama)
    soup = gt.stir_the_soup(tei)
    if act:
        pass
    else:
        print("Das ausgewählte Drama hat {} Akte".format(
            gt.how_many_acts(soup)))
        act = click.prompt(
            'Gib den Akt ein, den du analysieren willst (falls du das Netzwerk für das gesamte Drama haben möchtest, wähle 0)'
        )
    which_act = int(act) - 1
    if which_act == -1:
        total = gt.drama_total(soup)
        replik = gt.which_type(total, replik)
    else:
        act = gt.drama_act(soup, which_act)
        replik = gt.which_type(act, replik)
    replik = gs.get_sentis(replik)
    all_in_all = gs.average_senti(replik)
    nodes = gm.get_nodes(csv_drama)
    edges = gm.get_edges(all_in_all)
    labels_edges = gm.get_labels(edges)
    graph = gm.graph(edges, nodes)
    gm.malen(graph, labels_edges, draname, which_act + 1)
    os.system('clear')
    menu()
Ejemplo n.º 3
0

def screenShot():
    """用于截图并保存"""
    print('请按F1开始截图')
    if keyboard.wait(hotkey='f1') == None:
        print('复制剪切板的图片,请按Ctrl+b,不复制继续截图')
        if keyboard.wait(hotkey='Ctrl+b') == None:
            sleep(0.02)  # 防止获取的是上一张截图
            # 复制剪贴板里面的图片
            im = ImageGrab.grabclipboard()
            im.save('Picture.png')


if __name__ == '__main__':
    baiduapi = BaiDuAPI('password.ini')
    for _ in range(sys.maxsize):
        screenShot()
        texts = baiduapi.picture2Text('Picture.png')
        print(texts)
        GetText.setText(texts)  # 剪贴板剪贴
        sleep(0.02)
        GetText.getText()
        print('退出请按Ctrl+x')
        if keyboard.wait(hotkey='Ctrl+x') == None:
            name = input('请输入保存图像识别文字文件名:')
            f = open(name + '.txt', 'w')
            f.write(texts)
            f.close()
            break
Ejemplo n.º 4
0
#获取剪切板上的图片信息并保存到本地
def screenShot():
    #QQ截图按住Ctrl+A+alt截图,enter完成截图
    if keyboard.wait(hotkey='ctrl+alt+a')==None:
        if keyboard.wait(hotkey='enter')==None:
            sleep(0.01)
            #获取剪切板的图像内容
            im=ImageGrab.grabclipboard()
            #判断im的类型是否为图片
            if isinstance(im,Image.Image):
                im.save('imageGrab.png')
            else:
                print('重新截图')
    else:
        print('请按Ctrl+A+Alt来截图识别文字')

if __name__=='__main__':
    baiduapi=BaiDuAPI(r'C:\Users\Administrator\.spyder-py3\screenshot\password.ini')
    #baiduapi=BaiDuAPI
    
    #maxsize for循环是为了可以循环截图,一直不结束程序
    for _ in range(sys.maxsize):
    
        screenShot()
        
        text=baiduapi.picture2Text(r'C:\Users\Administrator\.spyder-py3\screenshot\imageGrab.png')
        
        print(text)
        GetText.setText(text)
        GetText.getText()
Ejemplo n.º 5
0
        # 同上,这里是截图开始
        if keyboard.wait(hotkey='Enter') is None:
            # 因为获取剪切板的图片太快了,所以要停顿一小段时间
            sleep(0.01)
            # 复制剪切板中的图片
            im = ImageGrab.grabclipboard()
            sleep(0.01)
            im.save('Picture.png')


if __name__ == "__main__":
    # 为了可以多次截图而不是截一次图就结束程序
    baidu_api = BaiDuAPI()

    for i in range(sys.maxsize):
        if i == 0:
            print("注意事项:请使用QQ的默认截图按键(Ctrl+alt+a),截图完成后请敲击回车键完成截图!")
            print("截图完成后,你截取图片中的文字自动复制,您可自由粘贴!")
            print("现在开始截取你需要的图片吧!\n")
        else:
            print("您已成功截图,请截取下一张图!\n")
        screenShot()

        res = baidu_api.pictureText()
        print('截取的内容:' + res)

        GetText.setText(res)
        sleep(0.01)
        GetText.getText()

Ejemplo n.º 6
0
class TextObject:
    def __init__(self):
        self.sentences = []
        self.matchesCrafted = []
        self.matchesRegex = []
        self.matchesNeural = []
        self.title = ""
        self.textLoaded = False
        self.Neural = False
        self.st = SplitText(self)
        self.gt = GetText()
        self.ch = Chunking()
        self.ch.addTT(self)
        self.nameListNeural = [
            "Value", "Pattern Grammar", "Pattern Sentence", "Article Grammar",
            "Article Sentence", "Compared"
        ]
        self.notFile = "C:\\Users\\trace\\projects\\python\\masters\\informationPull\\pdata\\notlist.txt"
        self.resultFolder = 'C:\\Users\\trace\\projects\\python\\masters\\informationPull\\pdata\\results'
        self.notList = []
        self.patterns = False
        self.load()

    def loadPatterns(self):
        self.patterns = self.st.getPatternObject()

    def load(self):
        'Load troublesome word file. "not" etc.'
        f = open(self.notFile, "r", encoding="utf8")
        ns = f.readlines()
        for x in ns:
            self.notList.append(x.strip())
        self.loadPatterns()

    def reset(self):
        self.sentences = []
        self.matchesCrafted = []
        self.matchesRegex = []
        self.matchesNeural = []

    #Pattern Sentence

    def returnPOSList(self, sentence):
        return self.ch.returnPOSList(sentence)

    def getGraph(self, sentence):
        #return self.ch.treeHTML(sentence)
        return self.ch.displacyService(sentence)

    def sentenceTokenDisplay(self, sentence):
        return self.st.sentenceTokenDisplay(sentence)

    def sentenceTokenDisplayList(self, sentenceList):
        ls = []
        for x in sentenceList:
            ls.append(self.st.sentenceTokenDisplay(x))
        return ls

    def getArticle(self, name):
        'Given an article title, return list of sentences from article'
        article = self.gt.getArticle(name)
        if article != False:
            if len(article) > 0:
                title = article[0]
                text = article[1]
                sentences = self.st.textToSentenceList(text)
                self.saveSentences(title, sentences)
                return [title, sentences]
        return False

    def splitSentencesInSectionList(self, articleSections):
        for x in articleSections:
            tx = x["Text"]
            sentences = self.st.textToSentenceList(tx)
            txList = []
            for y in sentences:
                txList.append(y)
            x["Text"] = txList
        return articleSections

    def getArticleSectionList(self, name):
        articleSections = self.gt.getArticleSectionList(name)
        if articleSections != False:
            if len(articleSections) > 0:
                title = articleSections[0]
                textDict = articleSections[1]
                sentences = self.splitSentencesInSectionList(textDict)
                #sentences = textDict
                self.saveSentences(title, sentences)
                return [title, sentences]
        return False

    def isTroubleSome(self, sentence):
        checkSentence = sentence.lower()
        for x in self.notList:
            if x in checkSentence:
                return True
        return False

    def removeTroublesomeSentencesRegex(self, matchList, nameToCheck):
        for x in matchList:
            if (len(x) > 0) and (x[0] != False):
                for y in x:
                    if self.isTroubleSome(y[nameToCheck]):
                        x.remove(y)
        return matchList

    def removeTroublesomeSentencesNeural(self, matchList, nameToCheck):
        'Given a list of dicts, removes entries where any word in "nameToCheck" matches ban list'
        for x in matchList:
            if self.isTroubleSome(x[nameToCheck]):
                matchList.remove(x)
        return matchList

    def findRegexMatches(self):
        print('Finding Regex Matches')
        articleList = self.getTextOnly(self.sentences)
        patternList = self.getPatterns()

        matches = self.ch.compareRegexAll(articleList, patternList)

        writeList = []
        for x in matches:
            if x:
                writeList.append(x[0])

        self.writeMatched(writeList, '-regex')
        #rt = self.st.findRegexMatches(inputText)
        #rtNoTrouble = self.removeTroublesomeSentencesRegex(rt,self.st.nameListRegex[3])
        #self.writeMatched(rtNoTrouble,'-regex')
        #return rtNoTrouble
        return matches

    def getTextOnly(self, textDict):
        textList = []
        for x in textDict:
            for y in x['Text']:
                textList.append(y)
        return textList

    def textToPOSTokens(self, text):
        return 0

    def constructReducedSentence(self, index, articleSentence, fullSentence,
                                 shortSentence):
        indexList = index.split(" ")
        indexs = []
        for x in indexList:
            try:
                indexs.append(int(x))
            except:
                print(index, articleSentence, fullSentence, shortSentence)
        return self.ch.constructReducedSentence(indexs, articleSentence,
                                                fullSentence, shortSentence)

    def getColumn(self, lst, col):
        return [val[col] for val in lst]

    def getPatterns(self):
        return self.patterns

    def findExactStructureMatches(self):
        text = self.getTextOnly(self.sentences)
        tks = self.sentenceTokenDisplayList(text)
        patterns = self.getPatterns()
        POSList = []
        matchList = []
        if not patterns:
            return False

        for x in text:
            for y in patterns:
                match = self.ch.exactMatch(x, y['ShortSentence'])
                if match:
                    reducedSen = self.constructReducedSentence(
                        y['ShortIndex'], x, y['FullSentence'],
                        y['ShortSentence'])
                    resultDict = {
                        'Article Sentence': x,
                        'Pattern': y['ShortSentence'],
                        'Match': 'Exact',
                        'Reduced Sentence': reducedSen
                    }
                    matchList.append(resultDict)

        #for x in patterns:
        #    POSList.append([x['ShortPOS'],x['ShortSentence']])
        #for x in tks:
        #    xList = self.getColumn(x,1)
        #    for y in POSList:
        #        yList = y[0].split(' ')
        #        compareNum = self.listCompareValue(xList,yList)
        #        if (compareNum == 1.0):
        #            textSen = ' '.join(self.getColumn(x,0))
        #            savedSen = y[1]
        #            resultDict = {
        #                            'Article Sentence':textSen,
        #                            'Pattern':savedSen,
        #                            'Match':'Exact',
        #                            }
        #            matchList.append(resultDict)
        return matchList

    def treeMatchFunct(self, x, patterns, matchList):
        return False
        #added = False
        #for idy,y in enumerate(patterns):
        #    if not added:
        #        if 'ShortSentence' in y:
        #            if self.ch.compareTree(x,y['ShortSentence']):
        #                match = self.ch.exactMatch(x,y['ShortSentence'])
        #                added = True
        #reducedSen = ''

        #                resultDict = {
        #                                'Article Sentence':x,
        #                                'Short Sentence':y['ShortSentence'],
        #                                }
        #                if match:
        #                    resultDict['Exact'] = 'Exact'
        #                if (y['FullSentence'] is not y['ShortSentence']) and match:
        #                    reducedSen = self.constructReducedSentence(y['ShortIndex'],x,y['FullSentence'],y['ShortSentence'])
        #                    resultDict['Reduced Sentence'] = reducedSen

        #                matchList.append(resultDict)

    def findTreeMatches(self):
        matches = self.ch.compareTreeAll(self.getTextOnly(self.sentences),
                                         self.getPatterns())
        self.writeMatched(matches, '-tree')
        matchesr = self.removeTroublesomeSentencesNeural(
            matches, 'Article Sentence')
        return matchesr

    def findNeuralMatches(self, resultType, lengthType):
        if self.textLoaded:
            if not self.Neural:
                self.Neural = NeuralClass()
                self.Neural.loadModel()
            #text = testText
            text = self.getTextOnly(self.sentences)

            #A list of sentences
            grammarTextList = self.st.listToToken(text)
            grammarText = []

            #Turns [('word0','POS0'),('word1','POS1')] to 'POS0 POS1'
            for senList in grammarTextList:
                sentence = ""
                for word in senList:
                    sentence += word[1] + " "
                grammarText.append(sentence)

            #Load patterns
            #[0] Reduced
            #[1] Full
            #[2] Dependency
            #[3] Head
            patterns = self.getPatterns()
            fullSentence = []
            fullPOS = []
            shortSentence = []
            shortPOS = []

            for x in patterns:
                fullSentence.append(x['FullSentence'])
                fullPOS.append(x['FullPOS'])
                shortSentence.append(x['ShortSentence'])
                shortPOS.append(x['ShortPOS'])

            matchSentence = fullSentence
            matchPOS = fullPOS
            if (lengthType == "short"):
                matchSentence = shortSentence
                matchPOS = shortPOS

            getNeuText = False
            getNeuPOS = True
            getNeuDep = False
            getNeuHead = False
            results = {"Text": "F", "POS": "F", "Dependency": "F", "Head": "F"}
            if (getNeuText):
                results["Text"] = self.Neural.runAndPlotPatterns(
                    matchSentence, text)
            if (getNeuPOS):
                results["POS"] = self.Neural.runAndPlotPatterns(
                    matchPOS, grammarText)
            if (getNeuDep):
                results["Dep"] = self.Neural.runAndPlotPatterns(
                    matchPOS, grammarText)
            if (getNeuHead):
                results["Head"] = self.Neural.runAndPlotPatterns(
                    matchPOS, grammarText)

            matchList = []

            for idx, x in enumerate(results["POS"]):
                for idy, y in enumerate(x):
                    #Text = results[]
                    ps = results["POS"][idx][idy]
                    resultDict = {
                        self.nameListNeural[0]: y,
                        self.nameListNeural[1]: matchPOS[idy],
                        self.nameListNeural[2]: matchSentence[idy],
                        self.nameListNeural[3]: grammarText[idx],
                        self.nameListNeural[4]: text[idx],
                        self.nameListNeural[5]: resultType,
                    }

                    # matchList.append([ y,grammarPattern[idy],sentencesFull[idy],grammarText[idx],text[idx]])
                    matchList.append(resultDict)
            rt = self.convertListString(self.viewMatches(matchList))

            rem = self.removeTroublesomeSentencesNeural(
                rt, self.nameListNeural[4])
            rems = rem[:100]

            noDupes = []
            for x in rems:
                ok = True
                for y in noDupes:
                    if x[self.nameListNeural[4]] == y[self.nameListNeural[4]]:
                        ok = False
                if ok:
                    noDupes.append(x)
            return noDupes

    def convertListString(self, data):
        for x in data:
            for key, value in x.items():
                x[key] = str(value)
            #x[self.nameListNeural[0]] = str(x[self.nameListNeural[0]])
        return data

    def writeMatched(self, data, typed):

        titleEnd = self.title
        if titleEnd.endswith(' '):
            titleEnd = titleEnd[:-1]
        f = open('{0}\\{1}{2}.txt'.format(self.resultFolder, titleEnd, typed),
                 "a",
                 encoding="utf-8")
        # f.write(json.dumps(str(data)))
        # json_data = f.read()
        #data = json.loads(json_data)

        for x in data:

            if x:
                for key in x:

                    f.write('{0}:{1}'.format(key, x[key]))
                    f.write("\n")
            #f.write('{0}:{1}'.format(self.nameListNeural[4],x[self.nameListNeural[4]]))
            #f.write("\n")
            #f.write('{0}:{1}'.format(self.nameListNeural[2],x[self.nameListNeural[2]]))
            #f.write("\n")
            #f.write('{0}:{1}'.format(self.nameListNeural[0],x[self.nameListNeural[0]]))
            #f.write("\n")
            f.write('END:')
            f.write("\n")

    def viewMatches(self, neuralMatchList):
        'Sorts so higher values are first'
        #columnIndex = 0
        #sortedArr = neuralMatchList
        #sortedArr.sort(key=lambda x: x[columnIndex],reverse=True)
        sortedArr = sorted(neuralMatchList,
                           key=lambda i: i[self.nameListNeural[0]],
                           reverse=True)
        self.writeMatched(sortedArr, '-neural')
        return sortedArr

    def saveSentences(self, title, sentences):
        self.textLoaded = True
        self.title = title
        self.sentences = sentences
Ejemplo n.º 7
0
import time
from PIL import ImageGrab, Image
import sys
from baidu import BaiDuapi
from getText import GetText


def screenShot():
    #监听按压事件
    if keyboard.wait(hotkey='shift+a') == None:
        time.sleep(0.01)
        if keyboard.wait(hotkey='esc') == None:
            im = ImageGrab.grabclipboard()
            if isinstance(im, Image.Image):
                im.save('图片.jpg')
                #print('图片保存完成')
            else:
                print('请重新截图')


if __name__ == "__main__":
    a = BaiDuapi('api.ini')
    for i in range(sys.maxsize):

        screenShot()

        cc = a.pictureText('图片.jpg')
        print(cc)
        GetText.setText(cc)
        GetText.getText()