def process_feed(self): for f in self._feed_list: self._link = f self._fburned = feedparser.parse(self._link) # grab the details from the burned feed self._furl = self._fburned['url'] self._fversion = self._fburned['version'] self._flang = "" self._log.info( "Processing articles for %s: %s, %s" % (self._furl, str(self._fversion), self._flang.strip())) self._articles = [] for i in self._fburned['items']: self._aframe = { 'title': None, 'date': None, 'link': None, 'keywords': [], 'feed': None, 'language': None } try: self._ititle = i['title'] #self._isummary = i['summary'] self._idate = i['published'] self._ilink = i['link'] self._ctext = stripper( i['summary']).get_data() #strip out HTML self._sum = summarize( self._ctext, self._kword_amt) #summarize the article self._kwords = self._sum.get_most_used_words() except Exception, e: print str(e) self._log.error("Failed to process %s" % i['title']) self._log.debug( "Processed article: \ntitle:%s \ndate:%s \nlink:%s \nkeywords:%s" % (self._ititle, self._idate, self._ilink, self._kwords)) self._aframe['title'] = self._ititle self._aframe['date'] = self._idate self._aframe['link'] = self._ilink self._aframe['keywords'] = self._kwords self._aframe['feed'] = self._furl self._aframe['language'] = self._flang hashed = hashlib.sha256(str(self._aframe)).hexdigest( ) #hash the contents to check in DB self._aframe['hashed'] = hashed self._articles.append(self._aframe) if self._mongodb_handle._not_processed(hashed): self._log.info("Adding %s (%s)" % (self._ititle, hashed)) self._mongodb_handle._insert_full(self._aframe)
def process_feed(self): for f in self._feed_list: self._link = f self._fburned = feedparser.parse(self._link) # grab the details from the burned feed self._furl = self._fburned['url'] self._fversion = self._fburned['version'] self._flang = "" self._log.info("Processing articles for %s: %s, %s" % (self._furl,str(self._fversion),self._flang.strip()) ) self._articles = [] for i in self._fburned['items']: self._aframe = {'title':None,'date':None,'link':None,'keywords':[],'feed':None,'language':None} try: self._ititle = i['title'] #self._isummary = i['summary'] self._idate = i['published'] self._ilink = i['link'] self._ctext = stripper(i['summary']).get_data() #strip out HTML self._sum = summarize(self._ctext,self._kword_amt) #summarize the article self._kwords = self._sum.get_most_used_words() except Exception,e: print str(e) self._log.error("Failed to process %s" % i['title']) self._log.debug("Processed article: \ntitle:%s \ndate:%s \nlink:%s \nkeywords:%s" % (self._ititle,self._idate,self._ilink,self._kwords)) self._aframe['title'] = self._ititle self._aframe['date'] = self._idate self._aframe['link'] = self._ilink self._aframe['keywords'] = self._kwords self._aframe['feed'] = self._furl self._aframe['language'] = self._flang hashed = hashlib.sha256(str(self._aframe)).hexdigest() #hash the contents to check in DB self._aframe['hashed'] = hashed self._articles.append(self._aframe) if self._mongodb_handle._not_processed(hashed): self._log.info("Adding %s (%s)" % (self._ititle,hashed)) self._mongodb_handle._insert_full(self._aframe)
def process_feed(self): for f in self._feed_list: self._frame = {'feed':None,'version':None,'language':None,'articles':[]} self._link,self._lang = f.split(",") self._lang = self._lang.strip() self._fburned = feedparser.parse(self._link) # grab the details from the burned feed self._furl = self._fburned['url'] self._fversion = self._fburned['version'] self._flang = self._lang self._log.debug("Processing articles for %s: %s, %s" % (self._furl,str(self._fversion),self._flang.strip()) ) self._articles = [] for i in self._fburned['items']: self._aframe = {'title':None,'date':None,'link':None,'keywords':[]} try: self._ititle = i['title'] #self._isummary = i['summary'] self._idate = i['published'] self._ilink = i['link'] self._ctext = stripper(i['summary']).get_data() #strip out HTML self._sum = summarize(self._ctext,self._kword_amt) #summarize the article self._kwords = self._sum.get_most_used_words() except Exception,e: self._log.error("Failed to process %s" % i['title']) self._log.debug("Processed article: \ntitle:%s \ndate:%s \nlink:%s \nkeywords:%s" % (self._ititle,self._idate,self._ilink,self._kwords)) self._aframe['title'] = self._ititle self._aframe['date'] = self._idate self._aframe['link'] = self._ilink self._aframe['keywords'] = self._kwords self._articles.append(self._aframe) self._frame['feed'] = self._furl self._frame['version'] = self._fversion self._frame['language'] = self._flang self._frame['articles'] = self._articles
def main2(debug=False, sent_limit=3, lambda_=0.7): docs = load_data("../data/database.txt") corpus = make_corpus(docs) tfidf = TfidfModel() model, dictionary = tfidf.generate(corpus) dictionary.save_as_text("../data/dict.txt") model.save("../data/model.model") """ dictionary = gensim.corpora.Dictionary.load_from_text("../data/dict.txt") model = gensim.models.TfidfModel.load("../data/model.model") """ target = read_file("../data/report.txt") target_sent, target_corpus = preprocess_target(target) indexes = summarize(target_corpus, model, dictionary, sent_limit=sent_limit, lambda_=lambda_) for index in sorted(indexes): print(target_sent[index])
def main(): cubeIndex = [] pyramidIndex = [] ellipsoidIndex = [] done = False testCase = int(input("What is the test case number?: ")) # This is a loop that lets the user to keep inputting values until the user types "q" or quit. while done == False: user = input("Enter Cube/c, Pyramid/p, Ellipsoid/e, Quit/q: ") # This ensures that once the user types "quit" or "q", no more inputs can be accepted. if user.lower() == "q" or user.lower() == "quit": done = True # This ensures that once the user types "Cube" or "c", the cube volume obtained from the function in the volume.py will have its value added to the list elif user.lower() == "cube" or user.lower() == "c": cubeIndex.append(cubeVol()) elif user.lower() == "pyramid" or user.lower() == "p": pyramidIndex.append(pyramidVol()) elif user.lower() == "ellipsoid" or user.lower() == "e": ellipsoidIndex.append(ellipsoidVol()) # If the user doesn't type in any of the shapes or quit, it will display this message else: print("Invalid input") # If none of the shapes are inputted, display the following message if len(cubeIndex) == 0 and len(pyramidIndex) == 0 and len( ellipsoidIndex) == 0: print( "You have reached the end of your session. You did not perform any volume calculations. " ) else: cubeOutput = "" pyramidOutput = "" ellipsoidOutput = "" # This ensures that the data will be sorted in increasing order cubeIndex = sorted(cubeIndex) pyramidIndex = sorted(pyramidIndex) ellipsoidIndex = sorted(ellipsoidIndex) # If the user does not enter any shapes, it will display this message for that particular shape if len(cubeIndex) == 0: cubeOutput = "You did not enter a shape." if len(pyramidIndex) == 0: pyramidOutput = "You did not enter a shape." if len(ellipsoidIndex) == 0: ellipsoidOutput = "You did not enter a shape." # If the user does enter a shape, output the following instructions if len(cubeIndex) > 0: for x in range(len(cubeIndex)): if x != (len(cubeIndex) - 1): cubeOutput += (" " + str(cubeIndex[x]) + ",") else: cubeOutput += (" " + str(cubeIndex[x])) # Same process as above if len(pyramidIndex) > 0: for x in range(len(pyramidIndex)): if x != (len(pyramidIndex) - 1): pyramidOutput += (" " + str(pyramidIndex[x]) + ",") else: pyramidOutput += (" " + str(pyramidIndex[x])) if len(ellipsoidIndex) > 0: for x in range(len(ellipsoidIndex)): if x != (len(ellipsoidIndex) - 1): ellipsoidOutput += (" " + str(ellipsoidIndex[x]) + ",") else: ellipsoidOutput += (" " + str(ellipsoidIndex[x])) # This will display a message if no shapes are present in the assigned list for a particular shape if len(cubeIndex) == 0: cubeOutput = "No Shapes Entered." if len(pyramidIndex) == 0: pyramidOutput = "No Shapes Entered." if len(ellipsoidIndex) == 0: ellipsoidOutput = "No Shapes Entered." # Print functions that displays a summary of the calculations of the shapes inputted print( "You have reached the end of your session. The volumes calculated for each shape are: " ) print("Cube:", cubeOutput) print("Pyramid:", pyramidOutput) print("Ellipsoid:", ellipsoidOutput) summarize(cubeIndex, pyramidIndex, ellipsoidIndex, testCase) return