Beispiel #1
def upload():
    # tagger classes and functions

    weights = pickle.load(open('data/dict.pkl',
                               'rb'))  # or your own dictionary
    myreader = tagger.Reader()  # or your own reader class
    mystemmer = tagger.Stemmer()  # or your own stemmer class
    myrater = tagger.Rater(weights)  # or your own... (you got the idea)
    mytagger = Tagger(myreader, mystemmer, myrater)

    # Get the name of the uploaded files
    uploaded_files = request.files.getlist("file[]")
    filenames = []
    tags = []
    for file in uploaded_files:
        # Check if the file is one of the allowed types/extensions
        if file and allowed_file(file.filename):
            # Make the filename safe, remove unsupported chars
            filename = secure_filename(file.filename)
            # Move the file form the temporal folder to the upload
            # folder we setup
  ['UPLOAD_FOLDER'], filename))
            # Save the filename into a list, we'll use it later
            # Redirect the user to the uploaded_file route, which
            # will basicaly show on the browser the uploaded file
    # Load an html page with a link to each uploaded file
    return render_template('upload.html', filenames=filenames, tags=tags)
Beispiel #2
def Get_TweetTags(data,no_tags,multi_tag_len, dict_path=None):
    if dict_path is not None:
        weights = pickle.load(open(dict_path, 'rb')) # or your own dictionary
        weights = pickle.load(open(BASE_DIR+'/Summarizer_Tagger/data/dict.pkl', 'rb')) # default dictionary

    myreader = tagger.Reader() # or your own reader class
    mystemmer = tagger.Stemmer() # or your own stemmer class
    myrater = tagger.Rater(weights,multi_tag_len) # or your own... (you got the idea)
    mytagger = Tagger(myreader, mystemmer, myrater)
    best_tags = mytagger(data, no_tags)
    return best_tags
Beispiel #3
# install Goose
# Done so far: basic keyword extraction using tagger works.
# Concerns about keyword extraction using Tagger library:
# - dictionary should be built from relevant corpi to article to be more
# 	effective at attracting attention in immersive interface
# - TF-IDF is a function provided in the module build_dict... if articles
# 	in collection ever accumulate enough around one subject, use TF-IDF
# immediate todos:
# - implement multitag

from goose import Goose
import tagger
import pickle

url = ""
g = Goose()
article = g.extract(url=url).cleaned_text

weights = pickle.load(open('data/dict.pkl', 'rb'))  # or your own dictionary
mytagger = tagger.Tagger(tagger.Reader(), tagger.Stemmer(),
best_3_tags = mytagger(article, 6)
print best_3_tags
Beispiel #4
def Summarizerr_old(file_name,no_tags,no_line_whole,no_line_para,keyword=None):    
    import tagger
    from tagger import Tagger,Stemmer
    import pickle
    data = file_name
    import os
    from TextSummarization import settings
    path = os.path.join(settings.PROJECT_ROOT,'static/dict.pkl')
    print path
    weights = pickle.load(open(path, 'rb')) # or your own dictionary
#     file = open("data/dict.txt", "w")
#     file.write(pickle.dump(data1, file))
#     file.close()
    print no_tags
    myreader = tagger.Reader() # or your own reader class
    mystemmer = tagger.Stemmer() # or your own stemmer class
    myrater = tagger.Rater(weights,3) # or your own... (you got the idea)
    mytagger = Tagger(myreader, mystemmer, myrater)
    best_tags = mytagger(data, no_tags)
    print best_tags
    print "\n"
    from Summarizer import SummaryTool
    # Create a SummaryTool object
    st = SummaryTool()
    summary = 'Tags :'
    for i in best_tags:
        summary += str(i).title().replace("'"," ")
    summary += '\n\n'
    summary += "Summary :\n\n"
    summary += st.get_summary(data,no_line_para,no_line_whole,keyword)
    # Print the summary
    ratio = 100*(float(len(summary)) / len(data))

    summary += "\n\n"
    summary +="Original Length :"
    summary +=str(len(data))
    summary +="\n"
    summary +="Summary Length :"
    summary +=str(len(summary))
    summary +="\n"
    summary +="Summary Ratio :"
    summary +=str(ratio)
    summary +="%"
    print ""
    print "Original Length %s" % len(data)
    print "Summary Length %s" % len(summary)
    print "Summary Ratio: %s" % ratio ,'%'
    return summary
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    return string.strip()

weights = pickle.load(open('data/dict.pkl', 'rb'))  # or your own dictionary
myreader = tagger.Reader()  # or your own reader class
mystemmer = tagger.Stemmer()  # or your own stemmer class
myrater = tagger.Rater(weights)  # or your own... (you got the idea)
mytagger = Tagger(myreader, mystemmer, myrater)
tags = []
f = pd.read_csv('articlesabsctracts.csv', delimiter=';')
abstracts = f['abstract']
for i in range(len(f['abstract']) - 1):
    f['abstract'][i] = re.sub('Abstract', '', f['abstract'][i])

for i in range(len(f['abstract']) - 1):
    f['abstract'][i] = re.sub('Summary', '', f['abstract'][i])
abstracts = [clean_str(a) for a in abstracts]
abstracts = [wordpunct_tokenize(a) for a in abstracts]
morph = pymorphy2.MorphAnalyzer()
for i in range(len(abstracts)):
Beispiel #6
import os
import pickle
import tagger

datafile = os.path.join(os.path.dirname(__file__), '..', 'data/dict.pkl')
# print datafile
weights = pickle.load(open(datafile, 'rb'))
rdr = tagger.Reader()
stmr = tagger.Stemmer()
rtr = tagger.Rater(weights)

extract_tags = tagger.Tagger(rdr, stmr, rtr)