예제 #1
0
파일: gitsetup.py 프로젝트: mhl/gib
 def abort_if_not_initialized(self):
     '''Check that the git repository exists and exit otherwise'''
     if not self.git_initialized():
         message = "You don't seem to have initialized {} for backup."
         print_stderr(message.format(self.directory_to_backup))
         message = "Please use '{} init' to initialize it"
         print_stderr(message.format(self.get_invocation()))
         sys.exit(Errors.REPOSITORY_NOT_INITIALIZED)
예제 #2
0
파일: gitsetup.py 프로젝트: mhl/gib
 def switch_to_correct_branch(self):
     self.set_HEAD_to(self.branch)
     self.abort_unless_HEAD_exists()
     # Also reset the index to match HEAD.  Otherwise things go
     # horribly wrong when switching from backing up one computer to
     # another, since the index is still that from the first one.
     msg = "Now working on a new branch, so resetting the index to match..."
     print_stderr(msg)
     check_call(self.git(["read-tree","HEAD"]))
예제 #3
0
파일: gitsetup.py 프로젝트: mhl/gib
    def print_settings(self):
        print_stderr('''Settings for backup:
backing up the directory {} (set from the {})
... to the branch "{}" (set from the {})
... in the git repository {} (set from the {})'''.format(
                self.directory_to_backup,
                OptionFrom.string_versions[self.directory_to_backup_from],
                self.branch,
                OptionFrom.string_versions[self.branch_from],
                self.git_directory,
                OptionFrom.string_versions[self.git_directory_from]),
        )
예제 #4
0
파일: gitsetup.py 프로젝트: mhl/gib
 def currently_on_correct_branch(self):
     '''Return True if HEAD currently points to 'self.branch', and
     return False otherwise.'''
     p = Popen(self.git(["symbolic-ref","HEAD"]),stdout=PIPE)
     c = p.communicate()
     if 0 != p.returncode:
         print_stderr("Finding what HEAD points to failed")
         sys.exit(Errors.FINDING_HEAD)
     result = c[0].decode().strip()
     if self.branch == result:
         return True
     elif ("refs/heads/"+self.branch) == result:
         return True
     else:
         return False
예제 #5
0
파일: gitsetup.py 프로젝트: mhl/gib
 def abort_unless_particular_config(self,key,required_value):
     '''Unless the git config has "required_value" set for "key", exit.'''
     current_value = self.config_value(key)
     if current_value:
         if current_value != required_value:
             message = "The current value for {} is {}, should be: {}"
             print_stderr(message.format(
                 key,
                 current_value,
                 required_value
             ))
             sys.exit(Errors.GIT_CONFIG_ERROR)
     else:
         message = "The {} config option was not set, setting to {}"
         print_stderr(message.format(key,required_value))
         self.set_config_value(key,required_value)
    def fetch_url(self, url, force=False, cookie_jar=None):

        if force is False and self.data_cacher.has_cached(url, self.cache_expiry_time):
            cache_file_path = self.data_cacher.get_cache_file_path(url)
            print_stderr('Using cached version of "'+url+'"')
            print_stderr('    ('+cache_file_path+')')
            url_data = self.data_cacher.load(url)
            return url_data
        
        print_stderr('Requesting data from "'+url+'"')

        response = requests.get(url, cookies=cookie_jar)
        response_text = response.text

        response_text = self.replace_fancy_chars(response_text)

        if force is False:
            self.data_cacher.save(url, response_text)
        
        return response_text
예제 #7
0
파일: gitsetup.py 프로젝트: mhl/gib
    def abort_unless_HEAD_exists(self):
        if not self.check_ref("HEAD"):
            message = '''The branch you are trying to back up to does not exist.
(Perhaps you haven't run "{} init")'''
            print_stderr(message.format(self.get_invocation()))
            sys.exit(Errors.NO_SUCH_BRANCH)
예제 #8
0
파일: gitsetup.py 프로젝트: mhl/gib
    def __init__(self, command_line_options):

        self.configuration_file = '.gib.conf'

        self.directory_to_backup = None
        self.directory_to_backup_from = None

        self.git_directory = None
        self.git_directory_from = None

        self.branch = None
        self.branch_from = None

        if command_line_options.directory:
            self.directory_to_backup = command_line_options.directory
            self.directory_to_backup_from = OptionFrom.COMMAND_LINE
        else:
            if 'HOME' not in os.environ:
                # Then we can't use HOME as default directory:
                print_stderr("The HOME environment variable was not set")
                sys.exit(Errors.STRANGE_ENVIRONMENT)
            self.directory_to_backup = os.environ['HOME']
            self.directory_to_backup_from = OptionFrom.DEFAULT_VALUE

        # We need to make sure that this is an absolute path before
        # changing directory:

        self.directory_to_backup = os.path.abspath(self.directory_to_backup)

        if not exists_and_is_directory(self.directory_to_backup):
            sys.exit(Errors.DIRECTORY_TO_BACKUP_MISSING)

        # Now we know the directory that we're backing up, try to load the
        # config file:

        configuration = RawConfigParser()
        configuration.read(os.path.join(self.directory_to_backup,
                                        self.configuration_file))

        # Now set the git directory:

        if command_line_options.git_directory:
            self.git_directory = command_line_options.git_directory
            self.git_directory_from = OptionFrom.COMMAND_LINE
        elif configuration.has_option('repository','git_directory'):
            self.git_directory = configuration.get(
                'repository','git_directory'
            )
            self.git_directory_from = OptionFrom.CONFIGURATION_FILE
        else:
            self.git_directory = os.path.join(self.directory_to_backup,'.git')
            self.git_directory_from = OptionFrom.DEFAULT_VALUE

        if not os.path.isabs(self.git_directory):
            print_stderr("The git directory must be an absolute path.")
            sys.exit(Errors.GIT_DIRECTORY_RELATIVE)

        # And finally the branch:

        if command_line_options.branch:
            self.branch = command_line_options.branch
            self.branch_from = OptionFrom.COMMAND_LINE
        elif configuration.has_option('repository','branch'):
            self.branch = configuration.get('repository','branch')
            self.branch_from = OptionFrom.CONFIGURATION_FILE
        else:
            self.branch = 'master'
            self.branch_from = OptionFrom.DEFAULT_VALUE

        # Check that the git_directory ends in '.git':

        if not re.search('\.git/*$',self.git_directory):
            message = "The git directory ({}) did not end in '.git'"
            print_stderr(message.format(self.git_directory))
            sys.exit(Errors.BAD_GIT_DIRECTORY)

        # Also check that it actually exists:

        if not os.path.exists(self.git_directory):
            message = "The git directory '{}' does not exist."
            print_stderr(message.format(self.git_directory))
            sys.exit(Errors.GIT_DIRECTORY_MISSING)
# Record timestamps at various places in the script to provide timing information.
TIMESTAMPS = {}
TIMESTAMPS['Started script'] = time.time()

# Allow config override if a config file path is supplied.
if config_file_path is not None:
    config_file = open(config_file_path, 'r')
    config = json.loads(config_file.read())

    for setting in config:
        if setting in CONFIG:
            CONFIG[setting] = config[setting]

# Fetch all text data from all story download URLs obtained.
print_stderr('Fetching stories text...')
stories_text = sys.stdin.read()

# Read in any lists of blacklisted and whitelisted strings.
blacklisted_strings = []
for blacklist_file_path in CONFIG['blacklists']:
    blacklist_file = open(blacklist_file_path, 'r')
    blacklisted_strings += [line.strip().lower() for line in blacklist_file]

whitelisted_strings = []
for whitelist_file_path in CONFIG['whitelists']:
    whitelist_file = open(whitelist_file_path, 'r')
    whitelisted_strings += [line.strip().lower() for line in whitelist_file]

# For our purposes, we're considering all words in a given blacklisted or whitelisted string to be blacklisted or
# whitelisted too.
# Create a UrlFetcher class which will do all the fetching for us (using a cache
# to prevent hammering the server)
fetcher = UrlFetcher(CONFIG['paths']['cache_dir'])

if CONFIG['fetch_mode'] == 'stories':
    # If `include_mature` was set, Create the `view_mature` cookie to send to
    # Fimfiction.
    requests_cookie_jar = None
    if 'include_mature' in CONFIG and CONFIG['include_mature'] is True:
        requests_cookie_jar = requests.cookies.RequestsCookieJar()
        requests_cookie_jar.set('view_mature', 'true', domain='www.fimfiction.net', path='/')

    # For all requested authors, crawl Fimfiction to find their stories and
    # obtain a list of story ids.
    for author in authors:
        print_stderr('Grabbing stories for author "{}"...'.format(author))
        # Fetch the profile page for the given user, and from it, obtain the URL
        # of their stories page.
        user_profile_url = CONFIG['base_url']+'/user/'+username_escape(author_username)
        user_profile_html = fetcher.fetch_url(user_profile_url, cookie_jar=requests_cookie_jar)

        soup = BeautifulSoup(user_profile_html, 'html.parser')
        stories_page_link = soup.find(class_='tab-stories').find('a')
        stories_page_url = CONFIG['base_url']+stories_page_link['href']
        stories_page_html = fetcher.fetch_url(stories_page_url, cookie_jar=requests_cookie_jar)

        author_stories = []
        # From the author's stories page, fetch all text download links for all stories on the page.
        while True:
            soup = BeautifulSoup(stories_page_html, 'html.parser')
            # Get all the "chapters footers" (the bar at the bottom of each story card which contains a download
 def fetch_page(self, url):
     print_stderr('Requesting data from {}...'.format(url))
     response = self.session.get(url)
     return response.text