Beispiel #1
0
 def __init__(self, arguments):
     self.saveTweetPath = SETTINGS['SAVE_TWEET_PATH'] + arguments[
         'query_name']
     self.saveUserPath = SETTINGS['SAVE_USER_PATH'] + arguments['query_name']
     if arguments['spider_name'] == 'TweetScraper':
         mkdirs(self.saveTweetPath)
     mkdirs(self.saveUserPath)
Beispiel #2
0
    def __init__(self):
        settings = get_project_settings()

        self.saveTweetPath = settings["SAVE_TWEET_PATH"]
        self.saveUserPath = settings["SAVE_USER_PATH"]
        mkdirs(self.saveTweetPath)  # ensure the path exists
        mkdirs(self.saveUserPath)
Beispiel #3
0
    def process_item(self, item, spider):
        newsaveTweetPath = self.saveTweetPath + item['location'] + "/" + item['category'][:-4] + "/"
        mkdirs(newsaveTweetPath)
        filename = item['filename'][:-6]
        if isinstance(item, Tweet):
            self.savePath = os.path.join(newsaveTweetPath, filename)
            if os.path.isfile(self.savePath):
                self.append_to_file(item, self.savePath)# simply skip existing items
                ### or you can rewrite the file, if you don't want to skip:
                # self.save_to_file(item,savePath)
                # logger.info("Update tweet:%s"%dbItem['url'])
            else:
                self.save_to_file(item, self.savePath)
                logger.debug("Add tweet:%s" % item['url'])

        elif isinstance(item, User):
            self.savePath = os.path.join(self.saveUserPath, item['ID'])
            if os.path.isfile(self.savePath):
                pass  # simply skip existing items
                ### or you can rewrite the file, if you don't want to skip:
                # self.save_to_file(item,savePath)
                # logger.info("Update user:%s"%dbItem['screen_name'])
            else:
                self.save_to_file(item, self.savePath)
                logger.debug("Add user:%s" % item['screen_name'])

        else:
            logger.info("Item type is not recognized! type = %s" % type(item))
Beispiel #4
0
    def __init__(self):
        self.tweets_file = None
        self.users_file = None

        self.saveTweetPath = settings['SAVE_TWEET_PATH']
        self.saveUserPath = settings['SAVE_USER_PATH']
        mkdirs(self.saveTweetPath)  # ensure the path exists
        mkdirs(self.saveUserPath)
Beispiel #5
0
    def scrap_following(self):

        browser = webdriver.PhantomJS(
            executable_path=
            'C:/Users/ShuaibReeyaz/Downloads/phantomjs-2.1.1-windows/bin/phantomjs'
        )
        browser.get("https://twitter.com/" + self.query[1:] + "/following")
        action = action_chains.ActionChains(browser)

        time.sleep(2)

        username = browser.find_element_by_css_selector(
            '.js-username-field.email-input.js-initial-focus')
        username.send_keys('*****@*****.**')

        password = browser.find_element_by_css_selector('.js-password-field')
        password.send_keys('ilias2019!')

        form = browser.find_element_by_css_selector(
            '.submit.EdgeButton.EdgeButton--primary.EdgeButtom--medium')
        form.submit()

        SCROLL_PAUSE_TIME = 0.5
        # Get scroll height
        last_height = browser.execute_script(
            "return document.body.scrollHeight")

        while True:
            # Scroll down to bottom
            browser.execute_script(
                "window.scrollTo(0, document.body.scrollHeight);")

            # Wait to load page
            time.sleep(SCROLL_PAUSE_TIME)

            # Calculate new scroll height and compare with last scroll height
            new_height = browser.execute_script(
                "return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        element = browser.find_element_by_xpath("//body")
        page = Selector(text=element.text)
        info = (element.text).split()

        self.saveUserPath = settings['SAVE_USER_FOLLOWERS_PATH']
        mkdirs(self.saveUserPath)
        savePath = os.path.join(self.saveUserPath, "following" + ".txt")
        with open(savePath, 'a+') as f:
            for i in info:
                if i.startswith('@') and len(i) > 1 and i != self.query:
                    f.write(i)
                    f.write("\n")
        savePath = os.path.join(self.saveUserPath, "scrapped_users" + ".txt")
        with open(savePath, 'a+') as f:
            f.write(self.query)
            f.write("\n")
Beispiel #6
0
    def check(self, stringToMatch):

        saveUserPath = settings['SAVE_USER_FOLLOWERS_PATH']
        mkdirs(saveUserPath)
        savePath = os.path.join(saveUserPath, "scrapped_users" + ".txt")
        with open(savePath, 'a+') as file:
            for line in file:
                if stringToMatch == line:
                    return True
            return False
Beispiel #7
0
 def __init__(self):
     self.saveTweetPath = settings['SAVE_TWEET_PATH']
     self.saveUserPath = settings['SAVE_USER_PATH']
     self.savefollowersPath = settings['SAVE_USER_FOLLOWERS_PATH']
     mkdirs(self.saveTweetPath)  # ensure the path exists
     mkdirs(self.saveUserPath)
     mkdirs(self.savefollowersPath)
Beispiel #8
0
 def __init__(self):
     self.saveTweetPath = settings['SAVE_TWEET_PATH']
     self.saveUserPath = settings['SAVE_USER_PATH']
     self.saveErrorPath = settings['SAVE_ERROR_PATH']
     mkdirs(self.saveTweetPath)  # ensure the path exists
     mkdirs(self.saveUserPath)
     mkdirs(self.saveErrorPath)
 def __init__(self):
     self.saveTweetPath = SETTINGS['SAVE_TWEET_PATH']
     self.saveUserPath = SETTINGS['SAVE_USER_PATH']
     mkdirs(self.saveTweetPath) # ensure the path exists
     mkdirs(self.saveUserPath)
Beispiel #10
0
 def __init__(self):
     self.saveTweetPath = settings['SAVE_TWEET_PATH']
     self.saveUserPath = settings['SAVE_USER_PATH']
     mkdirs(self.saveTweetPath)  # 确保路径存在
     mkdirs(self.saveUserPath)
Beispiel #11
0
 def open_spider(self, spider):
     settings = spider.settings
     self.saveTweetPath = settings['SAVE_TWEET_PATH']
     self.saveUserPath = settings['SAVE_USER_PATH']
     mkdirs(self.saveTweetPath)  # ensure the path exists
     mkdirs(self.saveUserPath)
    def open_spider(self, spider):
        self.saveTweetPath = SETTINGS['SAVE_TWEET_PATH'] + spider.id + '/'
        self.saveUserPath = SETTINGS['SAVE_USER_PATH'] + spider.id + '/'

        mkdirs(self.saveTweetPath)  # ensure the path exists
        mkdirs(self.saveUserPath)
Beispiel #13
0
 def __init__(self):
     self.saveTweetPath = settings['SAVE_TWEET_PATH']
     self.saveUserPath = settings['SAVE_USER_PATH']
     mkdirs(self.saveTweetPath) # ensure the path exists
     mkdirs(self.saveUserPath)
Beispiel #14
0
 def __init__(self):
     self.saveTweetPath = settings.SAVE_TWEET_PATH
     #self.saveUserPath = settings.SAVE_USER_PATH
     mkdirs(self.saveTweetPath)  # ensure the path exists
Beispiel #15
0
 def __init__(self):
     self.saveTweetPath = settings['SAVE_TWEET_PATH']
     self.tweet_counts = 0
     mkdirs(self.saveTweetPath)  # ensure the path exists
Beispiel #16
0
 def __init__(self):
     from scrapy.conf import settings
     self.saveTweetPath = settings['SAVE_TWEET_PATH']
     self.saveUserPath = settings['SAVE_USER_PATH']
     mkdirs(self.saveTweetPath)  # ensure the path exists
     mkdirs(self.saveUserPath)