def convert_subscribers(self):
        '''
    '''

        ## Check directories and data
        self.data.check_directories()
        self.data.check_subscriber_data()

        ## Open data files
        self.list_subscribers = self.open_data_files('subscribers')

        total_subscribers = len(self.list_subscribers)
        api_calls = math.ceil(total_subscribers / self.api_limit)
        per = math.ceil(total_subscribers / api_calls)
        for i in range(0, api_calls):
            start = per * i
            end = start + per

            data = json.dumps({'users': self.list_subscribers[start:end]})
            response = requests.post(self.api_url,
                                     headers=self.headers,
                                     data=data)

            log_progress_bar(i + 1, api_calls, 'Sending data to Iterable',
                             'Subscriber conversion complete')

        ## Delete temporary directory
        if self.data.temp_dir_exists:
            self.data.remove_directory('temp')
Ejemplo n.º 2
0
    def fetch_posts(self):
        '''
    This is called from check_posts_data if the file 
    'conversion-data/posts-json.txt' doesn't exist
    '''

        try:
            with urllib.request.urlopen(self.fetch_posts_link + self.url_args +
                                        str(self.iterations['posts']),
                                        cafile=certifi.where()) as res:
                data = res.read()
                total_pages = int(res.info().get('X-Wp-Totalpages'))

        except (urllib.error.HTTPError, urllib.error.URLError) as e:
            data = None

        if data:
            path = self.get_file_path('posts', True)
            file_ = open(path, 'wb')
            file_.write(data)
            file_.close()

            with open(path) as json_file:
                json_data = json.load(json_file)

        else:
            json_data = None

        if json_data:
            for n in json_data:
                if self.date_limit \
                  and dateutil.parser.parse(n['date']) < self.date_limit:
                    break

                self.temp_post_data.append({
                    "id": n['id'],
                    "link": n['link'],
                    "tags": n['tags'],
                    "category": n['categories']
                })

            log_progress_bar(self.iterations['posts'], total_pages,
                             'Fetching post data',
                             'Posts successfully fetched')

            self.iterations['posts'] += 1

            self.fetch_posts()

        else:
            path = self.get_file_path('posts')
            file_ = open(path, 'w')
            file_.write(json.dumps(self.temp_post_data))
            file_.close()
Ejemplo n.º 3
0
    def fetch_categories(self):
        '''
    This is called from check_categories_data if the file 
    'conversion-data/categories-json.txt' doesn't exist
    '''

        try:
            with urllib.request.urlopen(self.fetch_categories_link +
                                        self.url_args +
                                        str(self.iterations['categories']),
                                        cafile=certifi.where()) as res:
                data = res.read()
                total_pages = int(res.info().get('X-Wp-Totalpages'))

        except (urllib.error.HTTPError, urllib.error.URLError) as e:
            data = None

        if data:
            path = self.get_file_path('categories', True)
            file_ = open(path, 'wb')
            file_.write(data)
            file_.close()

            with open(path) as json_file:
                json_data = json.load(json_file)

        else:
            json_data = None

        if json_data:
            for n in json_data:
                self.temp_categories_data[n['id']] = {
                    "name": n['name'],
                    "slug": n['slug'],
                    "parent": n['parent']
                }

            log_progress_bar(self.iterations['categories'], total_pages,
                             'Fetching categories data',
                             'Categories successfully fetched')

            self.iterations['categories'] += 1

            self.fetch_categories()

        else:
            path = self.get_file_path('categories')
            file_ = open(path, 'w')
            file_.write(json.dumps(self.temp_categories_data))
            file_.close()
Ejemplo n.º 4
0
    def run(self):
        '''
    '''

        self.data.check_input_files()

        ## Open data files
        self.list_posts = self.data.open_data_files('posts')

        ## Loop through all posts
        total_posts = len(self.list_posts)

        log_update(
            f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}')

        index = 0
        errored_posts = []
        for post in self.list_posts:
            index += 1

            post_id = int(post['id'])
            target = post['legacy_category']

            if post_id in self.progress:
                continue

            if target != self.legacy_category:
                errored_posts.append({
                    'ID': post_id,
                    'Reason': 'Legacy category doesn\'t match'
                })
                continue

            try:
                data = {
                    'post': post_id,
                    'dev_tag': f'legacy-category-{target}'
                }

                headers = {'Content-Type': 'application/json'}
                r = requests.post(f'{self.target}/{self.post_request_path}',
                                  json=data,
                                  headers=headers)

            except (requests.exceptions.ConnectionError,
                    requests.exceptions.Timeout) as e:
                post_request = None
                errored_posts.append({
                    'ID':
                    post_id,
                    'Reason':
                    'requests.exceptions.ConnectionError, requests.exceptions.Timeout'
                })
                print(
                    "requests.exceptions.ConnectionError, requests.exceptions.Timeout"
                )
                print(e)
                pass

            except (ValueError, requests.exceptions.HTTPError,
                    urllib.error.URLError) as e:
                post_request = None
                errored_posts.append({
                    'ID':
                    post_id,
                    'Reason':
                    'ValueError, requests.exceptions.HTTPError, urllib.error.URLError'
                })
                print(
                    "ValueError, requests.exceptions.HTTPError, urllib.error.URLError"
                )
                print(e)
                pass

            except Exception as e:
                post_request = None
                errored_posts.append({'ID': post_id, 'Reason': 'Exception'})
                print("Exception")
                print(e)
                pass

            log_progress_bar(
                index, total_posts,
                f'Updating post {post_id}: {index} of {total_posts}',
                'All posts updated')

            self.update_progress_file(post_id)

        log_update(
            f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}')

        if len(errored_posts):
            print('Errors occrued on posts:')
            for post in errored_posts:
                print(post)

        if self.data.temp_dir_exists:
            self.data.remove_directory('temp')
    def convert_results(self, results_file, dev_file):
        '''
    '''

        data = []
        with open(f'{self.results_path}/{results_file}') as csv_file:
            csv_reader = csv.DictReader(csv_file)
            for index, row in enumerate(csv_reader, start=1):
                post_link = row['Link'] \
                  if row.get('Link') != None else ''
                hero_tag = row['Hero Tag'] \
                  if row.get('Hero Tag') != None else ''
                tag_1 = row['Tag 1'] \
                  if row.get('Tag 1') != None else ''
                tag_2 = row['Tag 2'] \
                  if row.get('Tag 2') != None else ''
                backend_tags = row['Backend Tags'] \
                  if row.get('Backend Tags') != None else ''
                post_id = row['post_id'] \
                  if row.get('post_id') != None else ''
                legacy_categories = row['Legacy Category'] \
                  if row.get('Legacy Category') != None else ''

                post_slug = urllib.parse.urlparse(post_link).path[:-1].split(
                    '/')[-1]

                conv_hero_tag = ''
                conv_tag_1 = ''
                conv_tag_2 = ''
                for key, category in self.list_categories.items():
                    if hero_tag == category['name']:
                        conv_hero_tag = key

                    if tag_1 == category['name']:
                        conv_tag_1 = key

                    if tag_2 == category['name']:
                        conv_tag_2 = key

                    if conv_hero_tag and conv_tag_1 and conv_tag_2:
                        break

                split_backend_tags = backend_tags.split(
                    ', ') if backend_tags else None
                conv_backend_tags = '|'.join(
                    split_backend_tags) if split_backend_tags else ''

                errors = []
                if post_id and legacy_categories:
                    conv_post_id = post_id
                    conv_legacy_category = legacy_categories

                else:
                    try:
                        post_request = requests.get(
                            f'{self.target}/wp-json/wp/v2/posts/?slug={post_slug}'
                        ).json()

                    except (requests.exceptions.ConnectionError,
                            requests.exceptions.Timeout) as e:
                        post_request = None
                        errors.append("ConnectionError/Timeout")
                        pass

                    except (ValueError, requests.exceptions.HTTPError,
                            urllib.error.URLError) as e:
                        post_request = None
                        errors.append("HTTPError/URLError")
                        pass

                    except Exception as e:
                        post_request = None
                        errors.append("exception")
                        pass

                    if post_id:
                        conv_post_id = post_id
                    else:
                        conv_post_id = post_request[0][
                            'id'] if post_request and len(
                                post_request) > 0 and post_request[0] else None

                    if legacy_categories:
                        conv_legacy_category = legacy_categories
                    else:
                        legacy_category_matches = []
                        legacy_category = post_request[0][
                            'categories'] if post_request and len(
                                post_request) > 0 and post_request[0] else []
                        matches = [
                            'good-advice', 'good-food', 'good-home',
                            'good-looks', 'good-sweat', 'good-travel'
                        ]
                        for legacy_category_id in legacy_category:
                            if self.list_categories[str(
                                    legacy_category_id)]['slug'] in matches:
                                legacy_category_matches.append(
                                    self.list_categories[str(
                                        legacy_category_id)]['slug'])

                        conv_legacy_category = '|'.join(
                            legacy_category_matches)

                if conv_post_id:
                    data.append([
                        conv_post_id, conv_hero_tag, conv_tag_1, conv_tag_2,
                        conv_legacy_category, conv_backend_tags
                    ])

                progress_message = f'Updating CSV post {conv_post_id}: {index} of {self.total_post_conversions}' if len(
                    errors
                ) == 0 else f'Updating CSV post {conv_post_id}: {index} of {self.total_post_conversions}'
                success_message = 'All CSV posts successfully converted' if len(
                    errors
                ) == 0 else f'CSV posts converted with {len(errors)} errors'

                log_progress_bar(index, self.total_post_conversions,
                                 progress_message, success_message)

        with open(f'{self.results_path}/{dev_file}', 'w') as new_dev_file:
            dev_writer = csv.writer(new_dev_file,
                                    delimiter=',',
                                    quotechar='"',
                                    quoting=csv.QUOTE_MINIMAL)

            dev_writer.writerow([
                'post_id', 'hero_tag', 'tag_1', 'tag_2', 'legacy_category',
                'backend_tag'
            ])
            for entry in data:
                dev_writer.writerow(entry)
    def run(self):
        '''
    '''

        log_update(
            f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}')

        index = 0
        for post in self.posts:
            index += 1

            post_id = post[0]
            hero_tag = post[1]
            tag_1 = post[2]
            tag_2 = post[3]
            legacy_category = post[4]
            backend_tags = post[5]

            # if post_id != str(9431934):
            #   continue

            if post_id in self.progress:
                continue

            categories = []
            if hero_tag:
                categories.append(int(hero_tag))

            if tag_1:
                categories.append(int(tag_1))

            if tag_2:
                categories.append(int(tag_2))

            hero_parent = self.list_categories[str(
                hero_tag)]['parent'] if hero_tag else 0
            while hero_parent != 0:
                categories.append(hero_parent)

                parent = self.list_categories[str(hero_parent)]['parent']
                hero_parent = parent

            for cat_id in self.list_categories:
                if self.list_categories[cat_id][
                        'slug'] in legacy_category.split('|'):
                    categories.append(int(cat_id))

            try:
                data = {
                    'categories': categories,
                    'hero_tag': int(hero_tag) if hero_tag else '',
                    'tag_1': int(tag_1) if tag_1 else '',
                    'tag_2': int(tag_2) if tag_2 else '',
                    'backend_tag': backend_tags,
                    'legacy_category': legacy_category
                }

                headers = {
                    'Content-Type': 'application/json',
                    'Authorization': os.environ['ENCODED_LOGIN']
                }

                r = requests.post(
                    f'{self.target}/wp-json/wp/v2/posts/{post_id}?skip_apple_news=true',
                    json=data,
                    headers=headers)

            except (requests.exceptions.ConnectionError,
                    requests.exceptions.Timeout) as e:
                post_request = None
                print("******************")
                print(
                    "requests.exceptions.ConnectionError, requests.exceptions.Timeout"
                )
                print(e)
                print("Errored post: " + post_id)
                pass

            except (ValueError, requests.exceptions.HTTPError,
                    urllib.error.URLError) as e:
                post_request = None
                print("******************")
                print(
                    "ValueError, requests.exceptions.HTTPError, urllib.error.URLError"
                )
                print(e)
                print("Errored post: " + post_id)
                pass

            except Exception as e:
                post_request = None
                print("******************")
                print("Exception")
                print(e)
                print("Errored post: " + post_id)
                pass

            log_progress_bar(
                index, self.total_post_updates,
                f'Updating post {post_id}: {index} of {self.total_post_updates}',
                'All posts updated')

            self.update_progress_file(post_id)

        log_update(
            f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}')
Ejemplo n.º 7
0
  def run(self):
    '''
    '''

    self.data.check_input_files()
    
    ## Open data files
    self.list_posts = self.data.open_data_files('posts')

    ## Loop through all posts
    total_posts = len(self.list_posts)
    
    log_update(f'Starting conversion at {datetime.now().strftime("%H:%M:%S")}')
    
    index = 0
    errored_posts = []
    for post in self.list_posts:
      index += 1

      post_id = int(post['id'])
      target = int(post['target_backend_tag'])

      if post_id in self.progress:
        continue

      try:
        data = {
          'post': post_id,
          'tag': target }

        headers = { 'Content-Type': 'application/json' }
        r = requests.post(f'{self.target}/wp-json/wellandgood/v1/transfer-to-backend-tags', json=data, headers=headers)


      except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
        post_request = None
        errored_posts.append(post_id)
        print("requests.exceptions.ConnectionError, requests.exceptions.Timeout")
        print(e)
        pass
      
      except (ValueError, requests.exceptions.HTTPError, urllib.error.URLError) as e:
        post_request = None
        errored_posts.append(post_id)
        print("ValueError, requests.exceptions.HTTPError, urllib.error.URLError")
        print(e)
        pass
      
      except Exception as e:
        post_request = None
        errored_posts.append(post_id)
        print("Exception")
        print(e)
        pass 

      log_progress_bar(
        index, 
        total_posts, 
        f'Updating post {post_id}: {index} of {total_posts}', 
        'All posts updated')

      self.update_progress_file(post_id)

    log_update(f'Conversion finished at {datetime.now().strftime("%H:%M:%S")}')

    if len(errored_posts):
      print('Errors occrued on posts:')
      for post in errored_posts:
        print(post)

    if self.data.temp_dir_exists:
      self.data.remove_directory('temp')