Example #1
0
    def __init__(self):
        super(Control_Panel, self).__init__()

        self.p = Parameters()

        self.connection = None

        self.setupUi(self)

        self.ParamFileModel = QtWidgets.QFileSystemModel(self.ParamList)
        self.ParamFileModel.setReadOnly(True)
        self.ParamFileModel.removeColumns(1, 2)

        root = self.ParamFileModel.setRootPath('save/params')
        self.ParamList.setModel(self.ParamFileModel)
        self.ParamList.setRootIndex(root)

        self.ParamList.clicked.connect(
            lambda: self.ParamLoadButton.setEnabled(True))

        # Connections
        self.ParamLoadButton.clicked.connect(lambda: self.load_params())
        self.ParamTree.itemChanged.connect(lambda: self.update_value())
        self.ConnectButton.clicked.connect(lambda: self.mongo_update())
        self.ScraperLoad.clicked.connect(lambda: self.scraper_config_load())
        self.ScraperDepthSlider.sliderReleased.connect(
            lambda: self.depth_slider())
        self.UseLocalCheckBox.toggled.connect(
            lambda: self.use_local_check_box())
        self.UpsertCheckBox.toggled.connect(lambda: self.upsert_check_box())
        self.ScraperButton.clicked.connect(lambda: self.run_scraper())
        self.NewlineCheckBox.toggled.connect(lambda: self.newline_check_box())
        self.PunctuationCheckBox.toggled.connect(
            lambda: self.puncuation_check_box())
        self.EmailsCheckBox.toggled.connect(lambda: self.emails_check_box())
        self.ContradictionsCheckBox.toggled.connect(
            lambda: self.contradictions_check_box())
        self.AccentsCheckBox.toggled.connect(lambda: self.accents_check_box())
        self.CurrencyCheckBox.toggled.connect(
            lambda: self.currency_check_box())
        self.FixUnicodeCheckBox.toggled.connect(
            lambda: self.unicode_check_box())
        self.LowercaseCheckBox.toggled.connect(
            lambda: self.lowercase_check_box())
        self.VisualizeButton.clicked.connect(lambda: self.visualizer_start())
Example #2
0
    def __init__(self, settings=None, mongo_cfg=None):

        self.settings = settings
        self.mongo_cfg = mongo_cfg

        self.mongo = Parameters()
        self.mongo.loader('dat/mongo.secret', 'server')
        self._mongo = self.mongo.server.Mongo_DB_Server_Params

        self.client = MongoClient(host=self._mongo.host,
                                  port=self._mongo.port,
                                  username=self._mongo.user,
                                  password=self._mongo.password)
        self.db = getattr(self.client, self._mongo.db)
        self.collection = getattr(self.db, self._mongo.collection)

        self.query_df = None
        self.query_dict = {}
        self.added_count = None
Example #3
0
    def update_from_df(self,
                       df=None,
                       drop_id=None,
                       upsert=None,
                       set_local=None,
                       verbose=False):

        try:
            if not df:
                df = self.query_df
        except:
            pass
        if not drop_id:
            drop_id = self.mongo_cfg.Options.Set_Local
        if not upsert:
            upsert = self.mongo_cfg.Options.Upsert
        if not set_local:
            set_local = self.mongo_cfg.Options.Set_Local

        if drop_id:
            df = df.drop(['_id'], axis=1)

        data = df.to_dict(orient='records')

        old_count = self.collection.count()

        for post in data:
            self.collection.update_one(
                {
                    'link': post['link'],
                    'subreddit': post['subreddit']
                }, {'$set': post},
                upsert=upsert)

        new_count = self.collection.count()

        self.added_count = new_count - old_count

        if verbose:
            print(f'Added {self.added_count} Entries to Database')

        if set_local:
            time_stamp = datetime.ctime(self.utc_to_pacific(datetime.now()))
        else:
            time_stamp = datetime.ctime(datetime.now())

        log = Parameters()
        log.loader('log/mongo.log', default=True)

        log.loaded.MONGOLOG.Date = time_stamp
        log.loaded.MONGOLOG.Added = self.added_count
        log.loaded.MONGOLOG.Total = new_count

        log.writer('log/scraper.log', log.loaded, append=True)
Example #4
0
    list_of_terms.append(term_dict)

mongo = Connect()
client = mongo.client

collection = client.Politiprocess.terms

start_count = collection.count()

for entry in list_of_terms:
    client.Politiprocess.terms.update_one({"timestamp": entry['timestamp']},{'$set': entry},
                                          upsert=True, bypass_document_validation=True)

end_count = collection.count()

added_count = end_count - start_count

if set_local:
    time_now = datetime.ctime(utc_to_pacific(datetime.now()))
else:
    time_now = datetime.ctime(datetime.now())


log = Parameters()
log.loader('log/JSON.log', default=True)

log.loaded.JSONLOG.Date    = time_now
log.loaded.JSONLOG.Added   = added_count
log.loaded.JSONLOG.Total   = end_count

log.writer('log/scraper.log', log.loaded, append=True)
Example #5
0
class Connect:
    def __init__(self, settings=None, mongo_cfg=None):

        self.settings = settings
        self.mongo_cfg = mongo_cfg

        self.mongo = Parameters()
        self.mongo.loader('dat/mongo.secret', 'server')
        self._mongo = self.mongo.server.Mongo_DB_Server_Params

        self.client = MongoClient(host=self._mongo.host,
                                  port=self._mongo.port,
                                  username=self._mongo.user,
                                  password=self._mongo.password)
        self.db = getattr(self.client, self._mongo.db)
        self.collection = getattr(self.db, self._mongo.collection)

        self.query_df = None
        self.query_dict = {}
        self.added_count = None

    def utc_to_pacific(self, utc_dt):
        local_tz = pytz.timezone('America/Los_Angeles')
        os.environ['TZ'] = 'America/Los_Angeles'
        local_dt = utc_dt.replace(tzinfo=pytz.utc).astimezone(local_tz)
        return local_tz.normalize(local_dt)

    def load_all(self):
        self.query_df = pd.DataFrame(list(self.collection.find()))

    def query(self,
              red_or_blue=None,
              articles=None,
              n_hours=None,
              count=None,
              custom_query=None,
              append_dfs=False,
              verbose=False):

        self.query_dict = {}

        if not red_or_blue:
            red_or_blue = self.settings.Query.Red_Blue_or_All
        if not articles:
            articles = self.settings.Query.Articles_Only
        if not n_hours:
            n_hours = self.settings.Query.Time_Frame_in_Hours
        if not append_dfs:
            append_dfs = self.settings.Query.Append_DFs
        if not count:
            count = self.settings.Query.Count
        else:
            n_hours = 0

        if articles:
            self.query_dict['is article'] = articles

        if articles:
            post = 'articles'
        else:
            post = 'documents'

        if not n_hours:
            if verbose:
                print(f"Pulling {count} {post} from {red_or_blue} targets.")
        else:
            if verbose:
                print(
                    f"Pulling {red_or_blue} articles from last {n_hours} hours."
                )
            dt = datetime.utcnow() - timedelta(hours=n_hours)
            self.query_dict['date'] = {'$gt': dt}

        if red_or_blue == 'Red':
            self.query_dict['target'] = True
        elif red_or_blue == 'Blue':
            self.query_dict['target'] = False
        elif red_or_blue == 'All':
            self.query_dict['target'] = [True, False]

        if custom_query:
            self.query_dict = {**self.query_dict, **custom_query}

        if self.query_dict['target'] == [True, False]:
            self.query_dict['target'] = True
            self.red_df = pd.DataFrame(
                list(
                    self.collection.find(self.query_dict,
                                         sort=[('_id', -1)],
                                         limit=count)))
            self.red_df.name = 'Red'
            self.query_dict['target'] = False
            self.blue_df = pd.DataFrame(
                list(
                    self.collection.find(self.query_dict,
                                         sort=[('_id', -1)],
                                         limit=count)))
            self.blue_df.name = 'Blue'
        if append_dfs:
            self.query_df = self.red_df.append(self.blue_df)
            self.query_df.name = 'All'

        if verbose:
            print(f'''Completed pulling {len(self.query_df)} {post}.
        Latest article is from {self.collection.find_one(sort=[('date', -1)])['date']} UTC'''
                  )

    def update_from_df(self,
                       df=None,
                       drop_id=None,
                       upsert=None,
                       set_local=None,
                       verbose=False):

        try:
            if not df:
                df = self.query_df
        except:
            pass
        if not drop_id:
            drop_id = self.mongo_cfg.Options.Set_Local
        if not upsert:
            upsert = self.mongo_cfg.Options.Upsert
        if not set_local:
            set_local = self.mongo_cfg.Options.Set_Local

        if drop_id:
            df = df.drop(['_id'], axis=1)

        data = df.to_dict(orient='records')

        old_count = self.collection.count()

        for post in data:
            self.collection.update_one(
                {
                    'link': post['link'],
                    'subreddit': post['subreddit']
                }, {'$set': post},
                upsert=upsert)

        new_count = self.collection.count()

        self.added_count = new_count - old_count

        if verbose:
            print(f'Added {self.added_count} Entries to Database')

        if set_local:
            time_stamp = datetime.ctime(self.utc_to_pacific(datetime.now()))
        else:
            time_stamp = datetime.ctime(datetime.now())

        log = Parameters()
        log.loader('log/mongo.log', default=True)

        log.loaded.MONGOLOG.Date = time_stamp
        log.loaded.MONGOLOG.Added = self.added_count
        log.loaded.MONGOLOG.Total = new_count

        log.writer('log/scraper.log', log.loaded, append=True)

    def count(self, query=None, red_or_blue=None):
        count_query = {}
        if red_or_blue == 'Red':
            count_query['target'] = 1
        elif red_or_blue == 'Blue':
            count_query['target'] = 0

        return self.collection.count(count_query)
Example #6
0
    def run(self, set_local=None, pickle=False, verbose=False):
        if not set_local:
            set_local = self.settings.Options.Set_Local

        if verbose:
            print('Starting Scraper')

        start_time = datetime.now()

        reddit = self.settings.Reddit_Params
        art_ignore = self.settings.Article.None_Article_Links

        API = Parameters()
        API.loader('dat/praw.secret')
        API = API.loaded.API_Script_Keys

        api = praw.Reddit(client_id=API.client_id,
                          client_secret=API.client_secret,
                          password=API.password,
                          user_agent=API.user_agent,
                          username=API.username)

        posts_dict = {
            "post title": [],
            "subreddit": [],
            "score": [],
            "is article": [],
            "article title": [],
            "title polarity": [],
            "title objectivity": [],
            "keywords": [],
            "domain": [],
            "link": [],
            "author": [],
            "text": [],
            "comments": [],
            "date": [],
            "target": [],
        }

        article_count = 0
        invalid_links = 0
        failed_links_c = 0
        failed_links = []
        red_sub = 0
        blue_sub = 0

        if verbose:
            print("Pulling Articles")

        for sub in reddit.Red_List + reddit.Blue_List:
            submissions = (x for x in api.subreddit(sub).hot(
                limit=reddit.Scraper_Depth_Limit) if not x.stickied)

            for post in submissions:

                if sub in reddit.Red_List:
                    posts_dict["target"].append(True)
                    red_sub += 1
                if sub in reddit.Blue_List:
                    blue_sub += 1
                    posts_dict["target"].append(False)

                posts_dict["post title"].append(
                    post.title)  # praw reddit scraping to dict
                posts_dict["link"].append(post.url)
                posts_dict["score"].append(int(post.score))
                posts_dict["subreddit"].append(sub)
                posts_dict["date"].append(
                    datetime.fromtimestamp(post.created_utc))

                comments = []  # Comments parsing and scoring
                for comment in post.comments:
                    try:
                        if comment.author != 'AutoModerator':
                            comments.append(
                                (round(comment.score / (post.num_comments),
                                       2), comment.body))
                    except:
                        pass
                posts_dict["comments"].append(comments)

                parsed_url = urlparse(post.url)  # Parse URL for domain
                posts_dict['domain'].append(parsed_url.netloc)

                post_blob = TextBlob(post.title)  # TextBlob NLP - VERY SIMPLE
                posts_dict["title polarity"].append(post_blob.sentiment[0])
                posts_dict["title objectivity"].append(post_blob.sentiment[1])
                posts_dict["keywords"].append(post_blob.noun_phrases)

                article = Article(post.url)  # Instantiate newspaper3k library
                if article.is_valid_url(
                ) and parsed_url.netloc not in art_ignore:

                    try:  # Try to download and parse article
                        article.download()
                        article.parse()

                        article_count += 1
                        posts_dict["is article"].append(True)

                        if article.title != []:  # Title parsed?
                            posts_dict["article title"].append(article.title)
                        else:
                            posts_dict["article title"].append(np.nan)

                        if article.authors != []:  # Author parsed?
                            posts_dict["author"].append(article.authors)
                        else:
                            posts_dict["author"].append(np.nan)

                        if article.text != []:  # Text parsed?
                            posts_dict['text'].append(article.text)
                        else:
                            posts_dict["text"].append(np.nan)

                    except:
                        posts_dict["is article"].append(False)
                        posts_dict["article title"].append(np.nan)
                        posts_dict["author"].append(np.nan)
                        posts_dict["text"].append(np.nan)
                        failed_links_c += 1
                        failed_links.append(post.url)

                else:
                    invalid_links += 1
                    posts_dict["is article"].append(False)
                    posts_dict["article title"].append(np.nan)
                    posts_dict["author"].append(np.nan)
                    posts_dict["text"].append(np.nan)

        if set_local:
            time_now = self.utc_to_pacific(datetime.now())
        else:
            time_now = datetime.now()  # Set local Time
        log_date = time_now.strftime('%m%d%y_%H%M')

        if verbose:
            print("Generating DataFrame")

        posts_df = pd.DataFrame(posts_dict)  # Make it a dataframe
        posts_df = posts_df[[
            "subreddit", "post title", "title polarity", "title objectivity",
            "score", "keywords", "comments", "domain", "link", "is article",
            "article title", "author", "text", "date", "target"
        ]]

        if pickle:
            posts_df.to_pickle(f'log/{log_date}.pickle')

        z = datetime.now() - start_time
        self.scrape_time = f"{(z.seconds//60)%60}min, {z.seconds%60}sec"

        log = Parameters()
        log.loader('log/scraper.log', 'loaded', default=True)

        log.loaded.SCRAPERLOG.Date = time_now.ctime()
        log.loaded.SCRAPERLOG.Scraper_Timer = self.scrape_time
        log.loaded.SCRAPERLOG.Article_Count = article_count
        log.loaded.SCRAPERLOG.Invalid_Links = invalid_links
        log.loaded.SCRAPERLOG.Failed_Links = failed_links
        log.loaded.SCRAPERLOG.Failed_Links_Count = failed_links_c
        log.loaded.SCRAPERLOG.Red_Sub_Count = red_sub
        log.loaded.SCRAPERLOG.Blue_Sub_Count = blue_sub

        log.writer('log/scraper.log', log.loaded, append=True)
        log.writer('log/scraper.log', self.settings, append=True)

        self.scraper_df = posts_df
Example #7
0
parser = argparse.ArgumentParser(description='Settings for scraper_script')
parser.add_argument('-v',
                    '--verbose',
                    help='Use for verbose output to console.',
                    action='store_true')
parser.add_argument('-sl',
                    '--set_local',
                    help='Use for setting local time.',
                    action='store_true')

args = parser.parse_args()

verbose = args.verbose
set_local = args.set_local

p = Parameters()

p.loader('save/params/default.params', 'params')
p.loader('dat/scraper.cfg', 'scraper')

scraper = Scraper(p.scraper)

scraper.run(verbose=verbose, set_local=set_local)

processor = Processing(p.scraper)

processor.pre_processor(scraper.scraper_df)

processor.spacy_processor(scraper.scraper_df, verbose=verbose)

connection = Connect(settings=p.params, mongo_cfg=p.scraper)
Example #8
0
class Control_Panel(base, ui):
    def __init__(self):
        super(Control_Panel, self).__init__()

        self.p = Parameters()

        self.connection = None

        self.setupUi(self)

        self.ParamFileModel = QtWidgets.QFileSystemModel(self.ParamList)
        self.ParamFileModel.setReadOnly(True)
        self.ParamFileModel.removeColumns(1, 2)

        root = self.ParamFileModel.setRootPath('save/params')
        self.ParamList.setModel(self.ParamFileModel)
        self.ParamList.setRootIndex(root)

        self.ParamList.clicked.connect(
            lambda: self.ParamLoadButton.setEnabled(True))

        # Connections
        self.ParamLoadButton.clicked.connect(lambda: self.load_params())
        self.ParamTree.itemChanged.connect(lambda: self.update_value())
        self.ConnectButton.clicked.connect(lambda: self.mongo_update())
        self.ScraperLoad.clicked.connect(lambda: self.scraper_config_load())
        self.ScraperDepthSlider.sliderReleased.connect(
            lambda: self.depth_slider())
        self.UseLocalCheckBox.toggled.connect(
            lambda: self.use_local_check_box())
        self.UpsertCheckBox.toggled.connect(lambda: self.upsert_check_box())
        self.ScraperButton.clicked.connect(lambda: self.run_scraper())
        self.NewlineCheckBox.toggled.connect(lambda: self.newline_check_box())
        self.PunctuationCheckBox.toggled.connect(
            lambda: self.puncuation_check_box())
        self.EmailsCheckBox.toggled.connect(lambda: self.emails_check_box())
        self.ContradictionsCheckBox.toggled.connect(
            lambda: self.contradictions_check_box())
        self.AccentsCheckBox.toggled.connect(lambda: self.accents_check_box())
        self.CurrencyCheckBox.toggled.connect(
            lambda: self.currency_check_box())
        self.FixUnicodeCheckBox.toggled.connect(
            lambda: self.unicode_check_box())
        self.LowercaseCheckBox.toggled.connect(
            lambda: self.lowercase_check_box())
        self.VisualizeButton.clicked.connect(lambda: self.visualizer_start())

    # Functions
    def load_params(self):

        self.ParamTree.clear()

        file = self.ParamFileModel.data(self.ParamList.selectedIndexes()[0])

        self.p.loader(f"save/params/{file}", 'params')

        self.ParamTree.setEnabled(True)
        self.ParamLoadedLabel.setText(f"{file}")
        self.ParamTree.setHeaderLabels(['Section', 'Value'])

        for section, value in self.p.params_dict.items():
            # print(key)

            root = QtWidgets.QTreeWidgetItem(self.ParamTree, [section])
            root.setExpanded(True)

            for key, val in value.items():
                if isinstance(val, list):
                    item = QtWidgets.QTreeWidgetItem([key])

                    for thing in val:
                        item2 = QtWidgets.QTreeWidgetItem()
                        item2.setData(1, 2, str(thing))
                        item2.setFlags(item.flags() | QtCore.Qt.ItemIsEditable)
                        item.addChild(item2)

                    root.addChild(item)
                    continue

                item = QtWidgets.QTreeWidgetItem([key])
                item.setData(1, 2, val)
                item.setFlags(item.flags() | QtCore.Qt.ItemIsEditable)
                root.addChild(item)

    def update_value(self):
        # if self.ParamTree.currentItem().
        value = self.ParamTree.currentItem().data(1, 2)
        name = self.ParamTree.currentItem().text(0)
        parent = self.ParamTree.currentItem().parent().text(0)

        self.ParamTree.currentItem().setForeground(
            1, QtGui.QBrush(QtGui.QColor("red")))

        self.p.params_dict[parent][name] = value

    def mongo_update(self):
        if not self.connection:
            self.connection = Connect()

        total = self.connection.count()
        red_count = self.connection.collection.count(query={'target': True})
        blue_count = self.connection.collection.count(query={'target': False})
        article_count = self.connection.collection.count(
            query={'is article': True})
        latest_article = self.connection.collection.find_one(
            sort=[('date', -1)])['date']
        if self.connection.added_count:
            self.AddedCount.display(self.connection.added_count)

        self.LatestArticleDate.setText(datetime.ctime(latest_article))
        self.ConnectButton.setStyleSheet('background-color: green')
        self.ConnectButton.setText('CONNECTED')

        self.TotalCount.display(total)
        self.RedCount.display(red_count)
        self.BlueCount.display(blue_count)
        self.ArticleCount.display(article_count)

    def scraper_config_load(self):

        self.p.loader(f"dat/scraper.cfg", 'scraper')

        self.ScraperDepthSlider.setEnabled(True)
        self.ScraperDepthNumber.setEnabled(True)
        self.UseLocalCheckBox.setEnabled(True)
        self.UpsertCheckBox.setEnabled(True)
        self.ScraperButton.setEnabled(True)
        self.ScraperLists.setEnabled(True)
        self.NewlineCheckBox.setEnabled(True)
        self.PunctuationCheckBox.setEnabled(True)
        self.EmailsCheckBox.setEnabled(True)
        self.ContradictionsCheckBox.setEnabled(True)
        self.AccentsCheckBox.setEnabled(True)
        self.CurrencyCheckBox.setEnabled(True)
        self.FixUnicodeCheckBox.setEnabled(True)
        self.LowercaseCheckBox.setEnabled(True)

        self.ScraperDepthSlider.setValue(
            self.p.scraper_dict['Reddit_Params']['Scraper_Depth_Limit'])
        self.UseLocalCheckBox.setChecked(
            self.p.scraper_dict['Options']['Set_Local'])
        self.UpsertCheckBox.setChecked(
            self.p.scraper_dict['Options']['Upsert'])

        self.NewlineCheckBox.setChecked(
            self.p.scraper_dict['Pre_Processing']['Remove_Newline'])
        self.PunctuationCheckBox.setChecked(
            self.p.scraper_dict['Pre_Processing']['Remove_Punctuation'])
        self.EmailsCheckBox.setChecked(
            self.p.scraper_dict['Pre_Processing']['Remove_Emails'])
        self.ContradictionsCheckBox.setChecked(
            self.p.scraper_dict['Pre_Processing']['Remove_Contradictions'])
        self.AccentsCheckBox.setChecked(
            self.p.scraper_dict['Pre_Processing']['Remove_Accents'])
        self.CurrencyCheckBox.setChecked(
            self.p.scraper_dict['Pre_Processing']['Replace_Currency'])
        self.FixUnicodeCheckBox.setChecked(
            self.p.scraper_dict['Pre_Processing']['Fix_Unicode'])
        self.LowercaseCheckBox.setChecked(
            self.p.scraper_dict['Pre_Processing']['All_Lowercase'])

        self.scraper_lists()

    def depth_slider(self):

        self.p.scraper_dict['Reddit_Params'][
            'Scraper_Depth_Limit'] = self.ScraperDepthSlider.value()

    def use_local_check_box(self):
        if self.UseLocalCheckBox.checkState() == 2:
            self.p.scraper_dict['Options']['Set_Local'] = True
        else:
            self.p.scraper_dict['Options']['Set_Local'] = False

    def upsert_check_box(self):
        if self.UseLocalCheckBox.checkState() == 2:
            self.p.scraper_dict['Options']['Set_Local'] = True
        else:
            self.p.scraper_dict['Options']['Set_Local'] = False

    def newline_check_box(self):
        if self.NewlineCheckBox.checkState() == 2:
            self.p.scraper_dict['Pre_Processing']['Remove_Newline'] == True
        else:
            self.p.scraper_dict['Pre_Processing']['Remove_Newline'] == False

    def puncuation_check_box(self):
        if self.PunctuationCheckBox.checkState() == 2:
            self.p.scraper_dict['Pre_Processing']['Remove_Punctuation'] == True
        else:
            self.p.scraper_dict['Pre_Processing'][
                'Remove_Punctuation'] == False

    def emails_check_box(self):
        if self.EmailsCheckBox.checkState() == 2:
            self.p.scraper_dict['Pre_Processing']['Remove_Emails'] == True
        else:
            self.p.scraper_dict['Pre_Processing']['Remove_Emails'] == False

    def contradictions_check_box(self):
        if self.ContradictionsCheckBox.checkState() == 2:
            self.p.scraper_dict['Pre_Processing'][
                'Remove_Contradictions'] == True
        else:
            self.p.scraper_dict['Pre_Processing'][
                'Remove_Contradictions'] == False

    def accents_check_box(self):
        if self.AccentsCheckBox.checkState() == 2:
            self.p.scraper_dict['Pre_Processing']['Remove_Accents'] == True
        else:
            self.p.scraper_dict['Pre_Processing']['Remove_Accents'] == False

    def currency_check_box(self):
        if self.CurrencyCheckBox.checkState() == 2:
            self.p.scraper_dict['Pre_Processing']['Replace_Currency'] == True
        else:
            self.p.scraper_dict['Pre_Processing']['Replace_Currency'] == False

    def unicode_check_box(self):
        if self.FixUnicodeCheckBox.checkState() == 2:
            self.p.scraper_dict['Pre_Processing']['Fix_Unicode'] == True
        else:
            self.p.scraper_dict['Pre_Processing']['Fix_Unicode'] == False

    def lowercase_check_box(self):
        if self.LowercaseCheckBox.checkState() == 2:
            self.p.scraper_dict['Pre_Processing']['All_Lowercase'] == True
        else:
            self.p.scraper_dict['Pre_Processing']['All_Lowercase'] == False

    def scraper_lists(self):

        self.ScraperLists.clear()

        for x in self.p.scraper_dict:
            for section, value in self.p.scraper_dict[x].items():
                if isinstance(value, list):
                    root = QtWidgets.QTreeWidgetItem(self.ScraperLists,
                                                     [section])
                    root.setExpanded(False)

                    for thing in value:
                        item = QtWidgets.QTreeWidgetItem()
                        item.setData(1, 2, str(thing))
                        root.addChild(item)

    def run_scraper(self):
        self.p.linker(self.p.scraper_dict, 'scraper')

        scraper = Scraper(self.p.scraper)
        processing = Processing(self.p.scraper)

        scraper.run()

        self.ProgressBar.setValue(25)

        processing.pre_processor(scraper.scraper_df)

        self.ProgressBar.setValue(50)

        processing.spacy_processor(scraper.scraper_df)

        self.ProgressBar.setValue(75)

        self.connection = Connect(settings=None, mongo_cfg=self.p.scraper)
        self.connection.update_from_df(scraper.scraper_df)

        self.mongo_update()

        self.ProgressBar.setValue(100)

        # self.AddedCount.

    def visualizer_start(self):
        self.p.linker(self.p.params_dict, 'params')

        self.connection.settings = self.p.params
        self.connection.query()

        if self.connection.settings.Query.Red_Blue_or_All == 'All':
            red_topics = Topic_Modeler(self.connection.red_df, self.p.params)
            red_topics.topic_modeler()
            red_topics.visualizer()

            image1 = QtGui.QPixmap(red_topics.save)
            image1 = image1.scaledToWidth(600, QtCore.Qt.SmoothTransformation)
            self.RedPlotView.resize(600, image1.height())
            self.RedPlotView.setPixmap(image1)

            blue_topics = Topic_Modeler(self.connection.blue_df, self.p.params)
            blue_topics.topic_modeler()
            blue_topics.visualizer()

            image2 = QtGui.QPixmap(blue_topics.save)
            image2 = image2.scaledToWidth(600, QtCore.Qt.SmoothTransformation)
            self.BluePlotView.resize(600, image2.height())
            self.BluePlotView.setPixmap(image2)