def setDateRangeTo(self, daterangeTo=None, printing=False):
     if daterangeTo is not None:
         newDaterangeTo = self._convertToValidJulianDate(
             daterangeTo, printing)
         if newDaterangeTo != -1:
             if self.daterangeFrom is not None:
                 if newDaterangeTo >= self.daterangeFrom:
                     self.daterangeTo = newDaterangeTo
                     ## update config
                     self.config['daterangeTo'] = newDaterangeTo
                     self.config['daterange_to'] = newDaterangeTo
                     return True
                 else:
                     print_error(
                         printing, self.__class__.__name__,
                         sys._getframe().f_code.co_name,
                         "daterangeTo cannot be less than daterangeFrom.")
                     return False
             else:
                 self.daterangeTo = newDaterangeTo
                 ## update config
                 self.config['daterangeTo'] = newDaterangeTo
                 self.config['daterange_to'] = newDaterangeTo
                 return True
     return False
    def setDateRangeFrom(self, daterangeFrom=None, printing=False):
        if daterangeFrom is not None:
            newDaterangeFrom = self._convertToValidJulianDate(
                daterangeFrom, printing)
            if newDaterangeFrom != -1:
                if self.daterangeTo is not None:
                    if newDaterangeFrom <= self.daterangeTo:
                        self.daterangeFrom = newDaterangeFrom
                        ## update config
                        self.config['daterangeFrom'] = newDaterangeFrom
                        self.config['daterange_from'] = newDaterangeFrom
                        return True
                    else:
                        print_error(
                            printing, self.__class__.__name__,
                            sys._getframe().f_code.co_name,
                            "daterangeFrom cannot be greater than daterangeTo."
                        )
                        return False
                else:
                    self.daterangeFrom = newDaterangeFrom
                    ## update config
                    self.config['daterangeFrom'] = newDaterangeFrom
                    self.config['daterange_from'] = newDaterangeFrom
                    return True

        return False
Exemple #3
0
	def DBSetup(self, dbTableName, printing):
		"""
		Ensures a table with the required name exists in the database.
		Args:
		    dbTableName: the table name to be used in the database.
		    printing: if we should print to terminal or not.

		Returns: True or False.
		"""


		## Assumes self.conn is not None, throws an Exception if it is.
		try:
			self.conn.execute('''CREATE TABLE IF NOT EXISTS `%s`(
				resultNumberInSearch 	INTEGER,
				SearchEngines			TEXT,
				Topic 					TEXT 	NOT NULL,
				URL 					TEXT 	NOT NULL,
				ResultPageNumber 		INTEGER NOT NULL,
				ResultNumberOnPage		INTEGER NOT NULL,
				StartDate 				INTEGER,
				EndDate 				INTEGER,
				SearchedOnDate 			DATE,
				ObtainedFromQuery 		TEXT 	NOT NULL,
				QueryPageURL			TEXT,
				PRIMARY KEY(SearchEngines, Topic, URL)
			);
			'''%dbTableName)
			self.conn.commit()
			return True

		except Exception, e:
			print_error(printing, self.__class__.__name__, sys._getframe().f_code.co_name, "could not create table '"+dbTableName+"' in database.", e)
			return False
 def goToNextDateRange(self, newRange=None, printing=False):
     if self.daterangeFrom is not None and self.daterangeTo is not None:
         if newRange is None:
             newRange = self.daterangeTo - self.daterangeFrom
         self.setDateRange(self.daterangeTo, self.daterangeTo + newRange)
         return True
     else:
         print_error(printing, self.__class__.__name__,
                     sys._getframe().f_code.co_name,
                     "daterangeFrom and daterangeTo are not set.")
     return False
 def goToPreviousDateRange(self, newRange=None, printing=False):
     if self.daterangeFrom is not None and self.daterangeTo is not None:
         if newRange is None:
             newRange = self.daterangeTo - self.daterangeFrom  ## the difference = length of time period
         self.setDateRange(self.daterangeFrom - newRange,
                           self.daterangeFrom)
         return True
     else:
         print_error(printing, self.__class__.__name__,
                     sys._getframe().f_code.co_name,
                     "daterangeFrom and daterangeTo are not set.")
     return False
 def setInTitle(self, intitle=None, printing=False):
     if intitle is not None:
         intitle = intitle.strip()
         if intitle.find("\n") == -1:
             if intitle.find(
                     " "
             ) != -1:  ## is there's multiple words, surround them with quotes
                 intitle = '"%s"' % intitle
             self.intitle = intitle
             ## update config
             self.config['intitle'] = intitle
             return True
         else:
             print_error(
                 printing, self.__class__.__name__,
                 sys._getframe().f_code.co_name,
                 "the title cannot have newlines in the word, only spaces, hyphens, underscores and periods."
             )
     return False
 def setFuzzyTopicsList(self, fuzzyTopicsList=None, printing=False):
     if fuzzyTopicsList is not None:
         if fuzzyTopicsList != []:
             if False in [
                     type(x) == type("") for x in fuzzyTopicsList
             ]:  ## i.e. if there is any item in the list which is not a string
                 print_error(printing, self.__class__.__name__,
                             sys._getframe().f_code.co_name,
                             "the list cannot contain non-strings.")
                 return False
             self.fuzzyTopicsList = fuzzyTopicsList
             ## update config
             self.config['fuzzyTopicsList'] = fuzzyTopicsList
             self.config['fuzzy_topics_list'] = fuzzyTopicsList
             return True
         else:
             print_error(printing, self.__class__.__name__,
                         sys._getframe().f_code.co_name,
                         "the list cannot be empty.")
     return False
    def setSiteList(self, siteList=None, printing=False):
        if siteList is not None:
            if siteList != []:
                if False in [
                        type(x) == type("") for x in siteList
                ]:  ## i.e. if there is any item in the list which is not a string
                    print_error(printing, self.__class__.__name__,
                                sys._getframe().f_code.co_name,
                                "the site list cannot contain non-strings.")
                    return False

                elif False in [x.find(" ") == -1 for x in siteList]:
                    print_error(
                        printing, self.__class__.__name__,
                        sys._getframe().f_code.co_name,
                        "websites in the site list cannot contain a space in their url."
                    )
                    return False

                else:
                    self.siteList = siteList
                    ## update config
                    self.config['siteList'] = siteList
                    self.config['site_list'] = siteList
                    return True
            else:
                print_error(printing, self.__class__.__name__,
                            sys._getframe().f_code.co_name,
                            "the list cannot be empty.")
        return False
    def _convertToValidJulianDate(self, daterangeDate, printing=False):
        """This function converts daterange values to the appropriate Julian date integer.
		The dateranges are allowed to be entered as datetime.datetime objects, datetime.date objects, or integers which are assumed to be the julian date (cannot be smaller than start of UNIX time i.e. 1 Jan 1970)."""

        if daterangeDate is not None:

            if type(daterangeDate) == type(datetime.datetime.now().date(
            )) or type(daterangeDate) == type(datetime.datetime.now(
            )):  ## works on both datetime.datetime and datetime.date objects.
                return self._toJulianDateDatetime(daterangeDate)

            elif type(daterangeDate) == type(
                    0):  ## if it is an integer, assumed to be julian date.
                if daterangeDate >= 2440588:  ## start of UNIX time, i.e. 1 Jan 1970. Not 4 Sept 1998 (i.e. date of founding of Google as a company) because Google has pages from before it was created.
                    return daterangeDate
                else:
                    print_error(
                        printing, self.__class__.__name__,
                        sys._getframe().f_code.co_name,
                        "daterangeDate has invalid value of %s, must not be before start of UNIX time i.e. 1 Jan 1970."
                        % (daterangeDate))

            else:
                print_error(
                    printing, self.__class__.__name__,
                    sys._getframe().f_code.co_name,
                    "daterangeDate has invalid value of '%s'. Should be a Julian date integer or a datetime object."
                    % (daterangeDate))

        else:
            print_error(printing, self.__class__.__name__,
                        sys._getframe().f_code.co_name,
                        "daterangeDate not set.")
        return -1
Exemple #10
0
	def connectToSQLiteDB(self, dbFilePath="GoogleSearchResults.db", dbTableName="SearchResultURLs", printing=True):
		"""
		Args:
		    dbFilePath: the file path of the SQLite database file. If not a .db file, it is corrected.
		    	e.g. "xxx/xxx/xxx.db" stays the same, whereas "xxx/xxx/xxx" becomes the former, and "xxx/xxx/" (i.e. a directory) bedomes "xxx/xxx/GoogleSearchResults.db"
		    dbTableName: the SQLite table name to be referred to henceforth.
		    printing: if we should print to terminal or not.

		Returns: True or False, depending on whether we have successfully connected to SQLite and created a usable table, or not.
		"""

		## Correct common errors:
		if dbFilePath.endswith("/") or dbFilePath.endswith("\\"):
			dbFilePath+="GoogleSearchResults.db"
		if not dbFilePath.endswith(".db"):
			dbFilePath+=".db"

		try:
			self.conn=sqliteDefaults.get_conn(dbFilePath, printing)
		except Exception, e:
			print_error(printing, self.__class__.__name__, sys._getframe().f_code.co_name, "could not connect to SQLite database.", e)
			self.conn = None
			return False