Exemple #1
0
def notes():
    from PyDictionary import PyDictionary
    from summa import keywords
    from summa.summarizer import summarize
    import nltk
    from nltk.tokenize import sent_tokenize
    from newspaper import Article
    from docx import Document
    url = str(request.form['link'])
    a = Article(url)
    a.download()
    a.parse()
    f = a.text
    b = a.title
    a = a.text
    a = keywords.keywords(a)
    dictionary = PyDictionary()
    a = a.split('\n')
    a1 = []
    for i in a:
        x = i.split(' ')
        for j in x:
            a1.append(j)
    a1.sort(key=lambda s: len(s))
    a1.reverse()
    try:
        a1 = a1[:20]
    except:
        pass
    a = set(a1)
    a = tuple(a1)
    a1 = []
    for i in range(10):
        try:
            a1.append(a[i])
        except:
            pass
    from nltk.stem import WordNetLemmatizer
    lemmatizer = WordNetLemmatizer()
    a = a1
    a1 = []
    for i in a:
        a1.append(lemmatizer.lemmatize(i))
    a = list(set(a1))
    a1 = a
    a = [dictionary.meaning(i) for i in a1]

    z = sent_tokenize(summarize(f, ratio=0.25))

    doc = Document()
    doc.add_heading('Notes for ' + b, 0)
    for i in z:
        doc.add_paragraph(i)
    doc.add_heading('Vocab Words from ' + b, 0)
    for i in range(len(a)):
        c = doc.add_paragraph(str(i + 1) + ') ')
        c.add_run(a1[i]).bold = True
        c.add_run(': ')
        d = str(list(a[i].values()))
        d = d.replace('[', '')
        d = d.replace(']', '')
        c.add_run(d)
        g = doc.add_paragraph('')
        g.add_run('Synonyms for ')
        g.add_run(a1[i].upper() + ': ').bold = True
        from datamuse import datamuse
        api = datamuse.Datamuse()
        s = api.words(ml=a1[i], max=10)
        s1 = []
        for i in s:
            for j in i:
                if j == 'word':
                    s1.append(i[j])
        g.add_run(str(s1).replace('[',
                                  '').replace(']',
                                              '').replace("'",
                                                          '')).italic = True
    whitelist = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
    fileName = b.replace(' ', '')
    fileName = ''.join(filter(whitelist.__contains__, fileName))
    fileName += '.docx'
    doc.save(fileName)
    import cloudmersive_convert_api_client
    from cloudmersive_convert_api_client.rest import ApiException
    configuration = cloudmersive_convert_api_client.Configuration()
    configuration.api_key['Apikey'] = 'f0c513bc-8c00-4491-830e-3e83b015feb6'
    api_instance = cloudmersive_convert_api_client.ConvertDocumentApi(
        cloudmersive_convert_api_client.ApiClient(configuration))
    try:
        # Convert Word DOCX Document to PDF
        api_response = api_instance.convert_document_docx_to_pdf(fileName)
        file = open('static/' + fileName.replace('.docx', '.pdf'), 'wb')
        file.write(api_response)
        file.close()
    except ApiException as e:
        print(
            "Exception when calling ConvertDocumentApi->convert_document_docx_to_pdf: %s\n"
            % e)
    myFile = fileName.replace('.docx', '.pdf')
    myFile2 = myFile
    note = Note(noteFile=str(myFile2), creator=current_user)
    db.session.add(note)
    db.session.commit()
    myFile = url_for('.static', filename=myFile)
    return render_template('notes.html', myFile=myFile)