Beispiel #1
0
    def create_directory(self, dir):
        '''
    The temporary directory is used to compile data
    while fetching from Wordpress API, it is created
    upon initiation of the fetch_tags or fetch_posts
    functions

    The data directory is used to store formatted data
    after fetching from Wordpress API, it is created
    upon initiation of the fetch_tags or fetch_posts
    functions
    '''

        path = None
        if dir == 'temp':
            path = f'{self.conversion_root}/{self.temp_path}'
        elif dir == 'data':
            path = f'{self.conversion_root}/{self.data_path}'

        if path:
            try:
                os.mkdir(path)

                if dir == 'temp':
                    self.temp_dir_exists = True
                elif dir == 'data':
                    self.data_dir_exists = True

                log_update(f'{dir.capitalize()} directory created')

            except FileExistsError:
                log_update(f'{dir.capitalize()} directory exists')
Beispiel #2
0
    def check_posts_data(self):
        '''
    Check if the posts data exists, if not,
    fetch the posts
    '''

        posts_file = self.get_file_path('posts')
        file_exists = False
        recompile_posts = True

        if os.path.exists(posts_file):
            file_exists = True

            with open(posts_file, 'r') as json_posts, \
              urllib.request.urlopen(
                self.fetch_posts_link +
                self.url_args, cafile=certifi.where()) as res:
                r = json_posts.read()
                temp_post_data = ast.literal_eval(r)

        if not file_exists or \
          (recompile_posts and self.update_data):
            self.temp_post_data = []
            self.fetch_posts()

        else:
            if file_exists:
                log_update('Loading "_data/data-posts.json"')
            else:
                log_update('Missing "_data/data-posts.json"')
Beispiel #3
0
  def check_subscriber_data(self):
    '''
    Make sure "_data/data-subscribers.csv" exists and convert 
    to "data-score.json"
    '''

    if os.path.exists(self.csv):
      self.convert_subscriber_csv()
    else:
      log_update(f'Missing CSV file')
      return None
Beispiel #4
0
    def check_conversion_data(self):
        '''
    Make sure "_data/tag-conversions.json" exists and convert 
    to "data-conversion.json"
    '''

        conversion_json_file = self.get_file_path('conversion')

        if os.path.exists(conversion_json_file):
            log_update(f'Loading "{self.conversion_file_json}"')
        else:
            self.convert_conversion_csv()
Beispiel #5
0
    def check_score_data(self):
        '''
    Make sure "_data/tag-scores.json" exists and convert 
    to "data-score.json"
    '''

        score_json_file = self.get_file_path('score')

        if os.path.exists(score_json_file):
            log_update(f'Loading "{self.score_file_json}"')
        else:
            self.convert_score_csv()
Beispiel #6
0
    def convert_score_csv(self):
        '''
    Convert google spreadsheet to a JSON file
    '''

        log_update(f'Converting scores from Google Sheet to JSON')

        data = {}
        csv_file = self.google_client.open_by_key(
            self.google_sheet_key).worksheet('scores').get_all_values()
        csv_file.pop(0)

        for rows in csv_file:
            name = rows[0]
            data[name] = int(rows[1])

        with open(self.get_file_path('score'), 'w') as json_file:
            json_file.write(json.dumps(data))
Beispiel #7
0
    def check_posts_data(self):
        '''
    Check if the posts data exists, if not,
    fetch the posts
    '''

        posts_file = self.get_file_path('posts')
        file_exists = False
        recompile_posts = True

        if os.path.exists(posts_file):
            file_exists = True

            with open(posts_file, 'r') as json_posts, \
              urllib.request.urlopen(self.fetch_posts_link, cafile=certifi.where()) as res:
                r = json_posts.read()
                temp_post_data = ast.literal_eval(r)

                if int(res.info().get('X-Wp-Total')) == len(temp_post_data):
                    log_update('Posts have been compiled and are up to date')

                    self.temp_post_data = temp_post_data
                    recompile_posts = False

        if not file_exists or \
          (recompile_posts and self.update_data):
            self.temp_post_data = []
            self.fetch_posts()

        else:
            if file_exists:
                log_update('Loading "_data/data-posts.json"')
            else:
                log_update('Missing "_data/data-posts.json"')
Beispiel #8
0
    def check_progress_data(self, reset_progress):
        '''
    Make sure "_data/data-progress.json" exists
    '''

        progress_file = self.get_file_path('progress')

        reset = []
        if os.path.exists(progress_file):
            if reset_progress:
                log_update(f'Resetting "{self.progress_file_json}"')

                os.remove(progress_file)
                with open(progress_file, 'w') as outfile:
                    outfile.write(json.dumps(reset))

            else:
                log_update(f'Loading "{self.progress_file_json}"')

        else:
            with open(progress_file, 'w') as outfile:
                outfile.write(json.dumps(reset))
Beispiel #9
0
  def remove_directory(self, dir):
    '''
    The temporary directory is used to compile data
    while fetching from Wordpress API, it is deleted
    after running the convert_tags function
    '''

    path = None
    if dir == 'temp':
      path = self.get_file_path('temp')
    

    if path and os.path.exists(path):
      try:
        shutil.rmtree(path)
        
        if dir == 'temp':
          self.temp_dir_exists = False

        log_update(f'{dir.capitalize()} directory removed')

      except OSError as e:
        log_update(f'Error creating temp directory: {e.filename} - {e.strerror}')
Beispiel #10
0
    def convert_conversion_csv(self):
        '''
    Convert google spreadsheet to a JSON file
    '''

        log_update(f'Converting conversions from Google Sheet to JSON')

        data = {}
        csv_file = self.google_client.open_by_key(
            self.google_sheet_key).worksheet('conversions').get_all_values()
        csv_file.pop(0)

        for rows in csv_file:
            slug = rows[0]
            data[slug] = {
                "conversion1": rows[1],
                "conversion2": rows[2],
                "backend_tag": True if rows[3] == 'TRUE' else False,
                "franchise_tag": True if rows[4] == 'TRUE' else False
            }

        with open(self.get_file_path('conversion'), 'w') as json_file:
            json_file.write(json.dumps(data))
Beispiel #11
0
  def convert_subscriber_csv(self):
    '''
    Convert input to a JSON file
    '''
    
    log_update(f'Converting CSV to JSON')

    data = {}
    with open(self.csv) as csv_file:
      csv_reader = csv.DictReader(csv_file)
      for row in csv_reader:
        subscriber = Subscriber(row)
        if not subscriber.is_valid: continue
          
        if subscriber.email in data:
          priors = data[subscriber.email]
        else:
          priors = None

        data[subscriber.email] = subscriber.compile_data(priors)
    
    data = [v for v in data.values()]
    with open(self.get_file_path('subscribers'), 'w') as json_file:
      json_file.write(json.dumps(data))
Beispiel #12
0
    def run(self):
        '''
    '''

        self.data.check_input_files()

        ## Open data files
        self.list_posts = self.data.open_data_files('posts')

        ## Loop through all posts
        total_posts = len(self.list_posts)

        log_update(
            f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}')

        index = 0
        errored_posts = []
        for post in self.list_posts:
            index += 1

            post_id = int(post['id'])
            target = post['legacy_category']

            if post_id in self.progress:
                continue

            if target != self.legacy_category:
                errored_posts.append({
                    'ID': post_id,
                    'Reason': 'Legacy category doesn\'t match'
                })
                continue

            try:
                data = {
                    'post': post_id,
                    'dev_tag': f'legacy-category-{target}'
                }

                headers = {'Content-Type': 'application/json'}
                r = requests.post(f'{self.target}/{self.post_request_path}',
                                  json=data,
                                  headers=headers)

            except (requests.exceptions.ConnectionError,
                    requests.exceptions.Timeout) as e:
                post_request = None
                errored_posts.append({
                    'ID':
                    post_id,
                    'Reason':
                    'requests.exceptions.ConnectionError, requests.exceptions.Timeout'
                })
                print(
                    "requests.exceptions.ConnectionError, requests.exceptions.Timeout"
                )
                print(e)
                pass

            except (ValueError, requests.exceptions.HTTPError,
                    urllib.error.URLError) as e:
                post_request = None
                errored_posts.append({
                    'ID':
                    post_id,
                    'Reason':
                    'ValueError, requests.exceptions.HTTPError, urllib.error.URLError'
                })
                print(
                    "ValueError, requests.exceptions.HTTPError, urllib.error.URLError"
                )
                print(e)
                pass

            except Exception as e:
                post_request = None
                errored_posts.append({'ID': post_id, 'Reason': 'Exception'})
                print("Exception")
                print(e)
                pass

            log_progress_bar(
                index, total_posts,
                f'Updating post {post_id}: {index} of {total_posts}',
                'All posts updated')

            self.update_progress_file(post_id)

        log_update(
            f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}')

        if len(errored_posts):
            print('Errors occrued on posts:')
            for post in errored_posts:
                print(post)

        if self.data.temp_dir_exists:
            self.data.remove_directory('temp')
    def run(self):
        '''
    '''

        log_update(
            f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}')

        index = 0
        for post in self.posts:
            index += 1

            post_id = post[0]
            hero_tag = post[1]
            tag_1 = post[2]
            tag_2 = post[3]
            legacy_category = post[4]
            backend_tags = post[5]

            # if post_id != str(9431934):
            #   continue

            if post_id in self.progress:
                continue

            categories = []
            if hero_tag:
                categories.append(int(hero_tag))

            if tag_1:
                categories.append(int(tag_1))

            if tag_2:
                categories.append(int(tag_2))

            hero_parent = self.list_categories[str(
                hero_tag)]['parent'] if hero_tag else 0
            while hero_parent != 0:
                categories.append(hero_parent)

                parent = self.list_categories[str(hero_parent)]['parent']
                hero_parent = parent

            for cat_id in self.list_categories:
                if self.list_categories[cat_id][
                        'slug'] in legacy_category.split('|'):
                    categories.append(int(cat_id))

            try:
                data = {
                    'categories': categories,
                    'hero_tag': int(hero_tag) if hero_tag else '',
                    'tag_1': int(tag_1) if tag_1 else '',
                    'tag_2': int(tag_2) if tag_2 else '',
                    'backend_tag': backend_tags,
                    'legacy_category': legacy_category
                }

                headers = {
                    'Content-Type': 'application/json',
                    'Authorization': os.environ['ENCODED_LOGIN']
                }

                r = requests.post(
                    f'{self.target}/wp-json/wp/v2/posts/{post_id}?skip_apple_news=true',
                    json=data,
                    headers=headers)

            except (requests.exceptions.ConnectionError,
                    requests.exceptions.Timeout) as e:
                post_request = None
                print("******************")
                print(
                    "requests.exceptions.ConnectionError, requests.exceptions.Timeout"
                )
                print(e)
                print("Errored post: " + post_id)
                pass

            except (ValueError, requests.exceptions.HTTPError,
                    urllib.error.URLError) as e:
                post_request = None
                print("******************")
                print(
                    "ValueError, requests.exceptions.HTTPError, urllib.error.URLError"
                )
                print(e)
                print("Errored post: " + post_id)
                pass

            except Exception as e:
                post_request = None
                print("******************")
                print("Exception")
                print(e)
                print("Errored post: " + post_id)
                pass

            log_progress_bar(
                index, self.total_post_updates,
                f'Updating post {post_id}: {index} of {self.total_post_updates}',
                'All posts updated')

            self.update_progress_file(post_id)

        log_update(
            f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}')
Beispiel #14
0
  def run(self):
    '''
    '''

    self.data.check_input_files()
    
    ## Open data files
    self.list_posts = self.data.open_data_files('posts')

    ## Loop through all posts
    total_posts = len(self.list_posts)
    
    log_update(f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}')
    
    index = 0
    errored_posts = []
    for post in self.list_posts:
      index += 1

      post_id = int(post['id'])
      target = int(post['target_backend_tag'])

      if post_id in self.progress:
        continue

      try:
        data = {
          'post': post_id,
          'tag': target }

        headers = { 'Content-Type': 'application/json' }
        r = requests.post(f'{self.target}/wp-json/wellandgood/v1/transfer-to-backend-tags', json=data, headers=headers)


      except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
        post_request = None
        errored_posts.append(post_id)
        print("requests.exceptions.ConnectionError, requests.exceptions.Timeout")
        print(e)
        pass
      
      except (ValueError, requests.exceptions.HTTPError, urllib.error.URLError) as e:
        post_request = None
        errored_posts.append(post_id)
        print("ValueError, requests.exceptions.HTTPError, urllib.error.URLError")
        print(e)
        pass
      
      except Exception as e:
        post_request = None
        errored_posts.append(post_id)
        print("Exception")
        print(e)
        pass 

      log_progress_bar(
        index, 
        total_posts, 
        f'Updating post {post_id}: {index} of {total_posts}', 
        'All posts updated')

      self.update_progress_file(post_id)

    log_update(f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}')

    if len(errored_posts):
      print('Errors occrued on posts:')
      for post in errored_posts:
        print(post)

    if self.data.temp_dir_exists:
      self.data.remove_directory('temp')