Exemple #1
0
def home():
    try:
        return render_template('home.html', topicsList = [])
    except:
        message = 'Error encountered for this data: ' + str(sys.exc_info()[0])
        Logger.error(os.path.basename(__file__), message)
        return render_template('home.html', topicsList = [])
Exemple #2
0
def callback():
    try:
        with open("core/storeURL.txt",'w') as f:
            f.write(request.form['input'] + "\n")
        return render_template('home.html', topicsList = [])
    except:
        message = 'Error encountered for this data: ' + str(sys.exc_info()[0])
        Logger.error(os.path.basename(__file__), message)
        return render_template('home.html', topicsList = [])
Exemple #3
0
def getdata():
    try:
        with open("core/storeURL.txt",'r') as f:
            a = list(processObj.top_keywords_from_url(f.readline().strip(),10).to_dict()["index"].values())
            if not a:
                return render_template('home.html', topicsList = ErrorMessage)
        return render_template('home.html', topicsList = a)
    except NameError as err:
        message = 'NameError in this data: ' + str(err)
        Logger.error(os.path.basename(__file__), message)
        return render_template('home.html', topicsList = ErrorMessage)
    except TypeError as err:
        message = 'TypeError in this data: ' + str(err)
        Logger.error(os.path.basename(__file__), message)
        return render_template('home.html', topicsList = ErrorMessage)
    except ValueError as err:
        message = 'ValueError in this data: ' + str(err)
        Logger.error(os.path.basename(__file__), message)
        return render_template('home.html', topicsList = ErrorMessage)
    except AttributeError as err:
        message = 'AttributeError in this data: ' + str(err)
        loggerObj.error(os.path.basename(__file__), message)
        return render_template('home.html', topicsList = ErrorMessage)
    except:
        message = 'Error encountered for this data: ' + str(sys.exc_info()[0])
        Logger.error(os.path.basename(__file__), message)
        return render_template('home.html', topicsList = ErrorMessage)
class Processing:
    def __init__(self):
        self.loggerObj = Logger()

    def top_keywords_from_url(self, url, no_of_relevant_topics):
        try:
            REMOVE = {'style', 'script', 'head', 'title', '[document]'}

            def visible_texts(soup):
                return re.compile(r'\s{3,}').sub(
                    '  ', ' '.join([
                        s for s in soup.strings if s.parent.name not in REMOVE
                    ]))

            html = request.urlopen(url).read()
            soup = BeautifulSoup(html, "lxml")
            text = visible_texts(soup)
            lmtzr = WordNetLemmatizer()
            word_tokens = word_tokenize(text)
            stop_words = set(stopwords.words('english'))
            filtered_sentence = [
                lmtzr.lemmatize(w.lower()) for w in word_tokens
                if not w in stop_words and len(w) >= 4
            ]
            Bigram = list(zip(filtered_sentence[::2], filtered_sentence[1::2]))
            countUnigram = Counter(filtered_sentence)
            countBigram = Counter(
                [i[0].strip() + " " + i[1].strip() for i in Bigram])

            df_unigram = DataFrame.from_dict(
                countUnigram,
                orient='index').sort_values(by=0,
                                            ascending=False).reset_index()
            df_bigram = DataFrame.from_dict(countBigram,
                                            orient='index').sort_values(
                                                by=0,
                                                ascending=False).reset_index()
            bigram_weight = df_unigram[0][0] / df_bigram[0][0]
            df_bigram[0] = df_bigram[0] * bigram_weight
            return df_unigram.append(df_bigram).sort_values(
                by=0,
                ascending=False).reset_index(drop=True)[:no_of_relevant_topics]
        except NameError as err:
            message = 'NameError in this data: ' + str(err)
            self.loggerObj.error(os.path.basename(__file__), message)
            return []
        except TypeError as err:
            message = 'TypeError in this data: ' + str(err)
            self.loggerObj.error(os.path.basename(__file__), message)
            return []
        except ValueError as err:
            message = 'ValueError in this data: ' + str(err)
            self.loggerObj.error(os.path.basename(__file__), message)
            return []
        except AttributeError as err:
            message = 'AttributeError in this data: ' + str(err)
            self.loggerObj.error(os.path.basename(__file__), message)
            return []
        except:
            message = 'Error encountered for this data: ' + str(
                sys.exc_info()[0])
            self.loggerObj.error(os.path.basename(__file__), message)
            return []