Example #1
0
    def scrape_listings_via_pagin_next(self, url, page_num=None):
        # next page url from 'Next 'pagination tag
        try:
            bs = load_page(url.split('&')[0], self.html_parser)
        except URLError:
            self.logger.error('Timeout error while scraping listings from %s',
                              url)
            return

        paging = bs.find('a', {'class': 'next'}, href=True)
        next_page_url = None if not paging else urljoin(
            self.domain, paging['href'])

        listing_urls = []
        listings = bs.find_all('a', {'class': 'image'}, href=True)
        if listings:
            for profile in listings:
                listing_urls.append(urljoin(self.domain, profile['href']))

        # if next page is previous page (pagination ended) break recursion
        if next_page_url or next_page_url == url:
            page_num = 1 if page_num is None else page_num + 1
            sys.stdout.write('\r[Scraping listing urls: {}]'.format(page_num))
            sys.stdout.flush()
            if page_num < 2:
                listing_urls += self.scrape_listings_via_pagin_next(
                    next_page_url, page_num)
        sys.stdout.write('\r')

        return listing_urls
Example #2
0
    def scrape_listings(url, rec=True):

        post_count = 0
        user_list = []
        comment_count = 0

        if '/r/' not in url:
            return post_count, comment_count, user_list

        while True:
            try:
                bs = load_page(url, Reddit.html_parser)
            except Exception as e:
                if rec:
                    return Reddit.scrape_listings(url, rec=False)

                logging.error(
                    'Unable to scrap profile for {}, after retrying 2 time, the reason: {}'
                    .format(url, str(e)))
                break

            try:
                posts = bs.find_all('div', {'class': 'top-matter'})
                post_count += len(posts)
                for post in posts:
                    comment = re.findall(
                        '\d+',
                        post.find('li', {
                            'class': 'first'
                        }).find('a').text)
                    if len(comment) > 0:
                        comment_count += int(comment[0])
                    user_name = post.find('p', {
                        'class': 'tagline'
                    }).find('a').text
                    if user_name not in user_list:
                        user_list.append(user_name)

                url = bs.find('span', {
                    'class': 'next-button'
                }).find('a')['href']
            except AttributeError:
                break

        return post_count, comment_count, user_list
Example #3
0
def __scrape_listings(url):
    try:
        bs = load_page(url, __html_parser)
    except:
        __logger.warning('Could not load bitcointalk page')
        return
    try:
        url_sample = re.match('.*topic=\d*', url).group(0)
    except:
        __logger.warning('Found unknown bitcoinalk reference')
        return

    urls = [url]
    pagins = bs.findAll('a', {'class': 'navPages'})
    for p in pagins:
        if p.has_attr('href'):
            url = re.match('.*topic=\d*(.\d+)?', p['href']).group(0)
            urls.append(url)

    last_pagin_num = 0
    for url in urls:
        try:
            n = int(url.split('.')[-1])
        except ValueError:
            continue

        if n > last_pagin_num:
            last_pagin_num = n

    i = 0
    urls_ = []
    while i != last_pagin_num + 20:
        urls_.append('{}.{}'.format(url_sample, str(i)))
        i += 20

    return random.sample(urls_, len(urls_))
Example #4
0
    def scrape_listings(self, url):
        try:
            # driver = setup_browser(self.browser_name)
            bs = load_page(url, self.html_parser)
        except:
            self.logger.critical('Error while scraping listings from %s', url)
            return

        # driver.get(url)
        # urls = []
        # wait = WebDriverWait(driver, 5)
        # try:
        #     while True:
        #         elements = driver.find_elements_by_css_selector('.t_wrap.t_line')
        #         for e in elements:
        #             urls.append(e.get_attribute('href'))
        #         next_ = wait.until(EC.presence_of_element_located(
        #             (By.XPATH, '//a[contains(text(), "ยป") and @class="pagination__link"]')))
        #         if next_:
        #             click(driver, next_)
        #         else:
        #             break
        # except:
        #     if len(urls) == 0:
        #         self.logger.critical('Could not extract listings from'.format(url))

        tags = bs.find('div', {
            'class': 'upcoming-sec__main'
        }).findAll('a', {'target': '_blank'})
        urls = []
        for tag in tags:
            urls.append(tag['href'])

        # driver.quit()

        return urls
Example #5
0
    def scrape_profile(self, url):
        data = DataKeys.initialize()
        data[DataKeys.PROFILE_URL] = url
        data[DataKeys.SOURCE] = SOURCES.ICOBAZAAR

        try:
            bs_ = load_page(url, self.__html_parser)
        except:
            self.logger.error('Could not scrape profile {}'.format(url))
            return

        # scrapping of basic data
        try:
            data[DataKeys.NAME] = bs_.find('div', {
                'class': 'com-header__info'
            }).find('h1').text
        except AttributeError:
            self.logger.error(self.NOT_FOUND_MSG.format(url, 'ICO name'))

        try:
            data[DataKeys.DESCRIPTION] = bs_.find('div', {
                'class': 'com-header__info'
            }).find('p').text
        except AttributeError:
            self.logger.error(self.NOT_FOUND_MSG.format(
                url, 'ICO description'))

        try:
            logo_url = bs_.find('div', {
                'class': 'com-header__logo'
            }).img['src'].strip()
            data[DataKeys.LOGO_PATH] = load_image(logo_url,
                                                  ScraperBase.logo_tmp_path)
        except (AttributeError, KeyError):
            self.logger.error(self.NOT_FOUND_MSG.format(url, 'ICO logo'))
        except Exception as e:
            self.logger.error('could not download {} logo with: {}'.format(
                url, str(e)))

        try:
            data[DataKeys.OVERALL_SCORE] = bs_.find(
                'div', {'class': 'ico-rating'})['rating']
        except:
            try:
                bs_ = load_page(url, self.__html_parser)
                data[DataKeys.OVERALL_SCORE] = bs_.find(
                    'div', {'class': 'ico-rating'})['rating']
            except (AttributeError, KeyError):
                self.logger.error(self.NOT_FOUND_MSG.format(url, 'Rating'))

        map_ = {
            'start': DataKeys.ICO_START,
            'end': DataKeys.ICO_END,
            'cap': DataKeys.HARD_CAP,
            'goal': DataKeys.SOFT_CAP,
            'price': DataKeys.ICO_PRICE
        }
        try:
            for a in bs_.find_all('div', {'class': 'com-sidebar__info-line'}):
                try:
                    key = map_[re.sub(':', '',
                                      a.find('span').text).strip().lower()]
                    try:
                        value = a.find('span', {
                            'class': 'com-sidebar__info-value'
                        }).text.strip()
                        data[key] = value
                    except AttributeError:
                        self.logger.error(
                            'No data for {} in sidebar'.format(key))
                        pass
                except AttributeError:
                    self.logger.error(
                        'Key {} does not exist in sidebar'.format(
                            re.sub(':', '',
                                   a.find('span').text.strip())))
                    pass
        except AttributeError:
            self.logger.error(self.NOT_FOUND_MSG.format(url, 'Sidebar'))
            pass

        try:
            data[DataKeys.WEBSITE] = bs_.find('div', {
                'class': 'com-sidebar'
            }).find('a')['href']
        except AttributeError:
            self.logger.error(self.NOT_FOUND_MSG.format(url, 'ICO website'))

        # scrap data from "community" tab of particular listing
        try:
            bs__ = load_page(url + '/community', self.__html_parser)
        except AttributeError:
            self.logger.error(
                'Could not scrape community of profile {}'.format(url))
            return

        # ----rating list
        try:
            rating_list = bs__.find('div', {
                'class': 'com-rating__list'
            }).find_all('div', {'class': 'com-rating__list-element'})
            for rate in rating_list:
                if rate.find('span').text.lower() == 'team':
                    data[DataKeys.TEAM_SCORE] = re.findall(
                        '\d{1,3}\%',
                        rate.find('div', {
                            'class': 'progress-bar'
                        }).find('span')['style'])[0].strip('%')
        except AttributeError:
            self.logger.error(self.NOT_FOUND_MSG.format(url, 'Team'))

        # getting social pages
        # TODO: maybe will be necessary to add other community types
        map_ = {
            'website': DataKeys.WEBSITE,
            'bitcointalk': DataKeys.BITCOINTALK_URL,
            'twitter': DataKeys.TWITTER_URL,
            'facebook': DataKeys.FACEBOOK_URL,
            'telegram': DataKeys.TELEGRAM_URL,
            'github': DataKeys.GITHUB_URL,
            'reddit': DataKeys.REDDIT_URL,
            'linkedin': DataKeys.LINKEDIN_URL,
            'slack': DataKeys.SLACK_URL
        }
        try:
            social_pages = bs__.find('div', {
                'class': 'com-social'
            }).find_all('a')
            for page in social_pages:
                try:
                    key = page.find('i')['class'][1].split('-')[1].lower()
                    if key in map_ and page.has_attr('href'):
                        value = page['href'].strip()
                        data[map_[key]] = value
                except AttributeError:
                    self.logger.error(
                        'Unsupported Community type for scrapping --> {} '.
                        format(page.find('i')['class'][1].split('-')[1]))
        except AttributeError:
            self.logger.error(self.NOT_FOUND_MSG.format(url, 'Social pages'))

        IcoBazaar.process(data)

        return data
Example #6
0
    def scrape_profile(self, url):
        data = DataKeys.initialize()
        data[DataKeys.PROFILE_URL] = url
        data[DataKeys.SOURCE] = SOURCES.ICOMARKS

        try:
            bs = load_page(url, self.html_parser)
        except:
            self.logger.error('Could not extract {} page'.format(url))
            return

        # name
        try:
            data[DataKeys.NAME] = bs.find('h1', {
                'itemprop': 'name'
            }).text.strip()
        except:
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'ICO name'))

        # logo
        try:
            logo_path = bs.find('img', {'itemprop': 'url'})['src']
            data[DataKeys.LOGO_PATH] = load_image(
                urljoin(self.domain, logo_path), ScraperBase.logo_tmp_path)
        except (AttributeError, KeyError):
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'ICO logo'))
        except Exception as e:
            self.logger.error('could not download {} logo with: {}'.format(
                url, str(e)))

        # overall scores
        try:
            data[DataKeys.OVERALL_SCORE] = bs.find(
                'div', {
                    'class': 'ico-rating-overall'
                }).text.strip()
        except:
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'ICO score'))

        # other scores
        score_mapping = {
            'ICO PROFILE': DataKeys.ICO_PROFILE_SCORE,
            'TEAM & ADVISORS': DataKeys.TEAM_SCORE
        }
        try:
            ratings = bs.findAll('div', {'class': 'ico-rating__item'})
            for rating in ratings:
                title = rating.find('div',
                                    class_='ico-rating__title',
                                    text=True)
                key = None if not title else title.text.strip().upper()
                if key in score_mapping:
                    value = rating.parent.find('div',
                                               class_='ico-rating__circle')
                    data[score_mapping[key]] = value.text.strip()
        except:
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'ICO score'))

        details_mapping = {
            'COUNTRY:': DataKeys.COUNTRY,
            'PRICE:': DataKeys.ICO_PRICE,
            'ACCEPTING:': DataKeys.ACCEPTED_CURRENCIES,
            'SOFT CAP:': DataKeys.SOFT_CAP,
            'HARD CAP:': DataKeys.HARD_CAP,
            'TICKER:': DataKeys.TOKEN_NAME,
            'PLATFORM:': DataKeys.PLATFORM,
            'TOKEN TYPE:': DataKeys.TOKEN_STANDARD
        }

        details_info = bs.select_one('div.icoinfo')
        try:
            desks = details_info.select('div.icoinfo-block__item')
            for detail in desks:
                title = detail.find('span', text=True)
                key = None if not title else title.text.strip().upper()
                if key in details_mapping:
                    value = title.parent.text.split(':')[1].strip()
                    data[details_mapping[key]] = value
        except:
            self.logger.error(
                'Someting went wrong in {}, when scraping detail rows'.format(
                    url))

        # pre ico time
        try:
            date = details_info.find('span', text='Pre-sale Time:')
            if date:
                value = date.parent.text.split(':')[1].upper()
                dates = value.split('-')
                data[DataKeys.PRE_ICO_START] = dates[0].strip()
                data[DataKeys.PRE_ICO_END] = dates[1].strip()
        except:
            self.logger.warning(self.NOT_FOUND_MSG.format(
                url, 'Pre Date info'))

        # ico time
        try:
            date = details_info.find('span', text='ICO Time:')
            if date:
                value = date.parent.text.split(':')[1].upper()
                dates = value.split('-')
                data[DataKeys.ICO_START] = dates[0].strip()
                data[DataKeys.ICO_END] = dates[1].strip()
        except:
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'Date info'))

        # website url
        try:
            title = details_info.find('span', text='Website:')
            if title:
                value = title.find_next_sibling('a')
                data[DataKeys.WEBSITE] = value['href']
        except:
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'Date info'))

        # KYC/Whitelist
        try:
            kyc_w = details_info.find('span', text='Whitelist/KYC:')
            if kyc_w == 'WHITELIST/KYC':
                text = kyc_w.parent.text.split(':')[1].upper()
                data[
                    DataKeys.
                    KYC] = BOOL_VALUES.YES if 'KYC' in text else BOOL_VALUES.NO
                data[
                    DataKeys.
                    WHITELIST] = BOOL_VALUES.YES if 'WHITELIST' in text else BOOL_VALUES.NO
        except:
            self.logger.warning(
                self.NOT_FOUND_MSG.format(url, 'KYC and whitelist'))

        # soc links
        try:
            soc_links = details_info.findAll('a',
                                             {'class': 'icoinfo-block__view'})
            for soc_link in soc_links:
                if soc_link.has_attr('href'):
                    if re.match(
                            '^(https?(:\/\/)?(www)?.?)?bitcointalk.org\/.*',
                            soc_link['href']):
                        data[DataKeys.BITCOINTALK_URL] = soc_link['href']
                        continue
                    if re.match('^(https?(:\/\/)?(www)?.?)?facebook.com\/.*',
                                soc_link['href']):
                        data[DataKeys.FACEBOOK_URL] = soc_link['href']
                        continue
                    if re.match('^(https?(:\/\/)?(www)?.?)?twitter.com\/.*',
                                soc_link['href']):
                        data[DataKeys.TWITTER_URL] = soc_link['href']
                        continue
                    if re.match('^(https?(:\/\/)?(www)?.?)?t.me\/.*',
                                soc_link['href']):
                        data[DataKeys.TELEGRAM_URL] = soc_link['href']
                        continue
                    if re.match('^(https?(:\/\/)?(www)?.?)?reddit.com\/.*',
                                soc_link['href']):
                        data[DataKeys.REDDIT_URL] = soc_link['href']
                        continue
                    if re.match('^(https?(:\/\/)?(www)?.?)?github.com\/.*',
                                soc_link['href']):
                        data[DataKeys.GITHUB_URL] = soc_link['href']
                        continue
                    if re.match('^(https?(:\/\/)?(www)?.?)?medium.com\/.*',
                                soc_link['href']):
                        data[DataKeys.MEDIUM_URL] = soc_link['href']
                        continue
                    if re.match('^(https?(:\/\/)?(www)?.?)?linkedin.com\/.*',
                                soc_link['href']):
                        data[DataKeys.LINKEDIN_URL] = soc_link['href']
                        continue
                    if re.match('^(https?(:\/\/)?(www)?.?)?linkedin.com\/.*',
                                soc_link['href']):
                        data[DataKeys.LINKEDIN_URL] = soc_link['href']
                        continue
        except:
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'Soc links'))

        # description
        try:
            data[DataKeys.DESCRIPTION] = bs.find(
                'div', {'class', 'company-description'}).text.strip()
        except:
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'Description'))

        IcoMarks.process(data)

        return data
Example #7
0
    def scrape_profile(self, url):
        data = DataKeys.initialize()
        data[DataKeys.PROFILE_URL] = url
        data[DataKeys.SOURCE] = SOURCES.TOKENTOPS

        try:
            bs = load_page(url, self.html_parser)
        except:
            self.logger.error('Could not extract {} page'.format(url))
            return

        # name
        try:
            data[DataKeys.NAME] = bs.find('h1', {
                'class': 'page-details__title'
            }).text.strip()
        except AttributeError:
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'ICO name'))

        # logo
        try:
            logo_path = bs.find('img', {'class': 'page-details__logo'})['src']
            data[DataKeys.LOGO_PATH] = load_image(
                urljoin(self.domain, logo_path), ScraperBase.logo_tmp_path)
        except AttributeError:
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'ICO logo'))
        except Exception as e:
            self.logger.error('could not download {} logo with: {}'.format(
                url, str(e)))

        # overall scores
        try:
            score = bs.find('div', {
                'class': 'rating_block'
            }).find('span', {
                'class': 'rating-text'
            }).text.strip()
            if score != '0':
                data[DataKeys.OVERALL_SCORE] = score
        except (AttributeError, ValueError):
            self.logger.warning(self.NOT_FOUND_MSG.format(
                url, 'Overall score'))

        # social links
        soc_mapping = {
            'Facebook': DataKeys.FACEBOOK_URL,
            'Github': DataKeys.GITHUB_URL,
            'Blog': DataKeys.MEDIUM_URL,
            'Telegram': DataKeys.TELEGRAM_URL,
            'Reddit': DataKeys.REDDIT_URL,
            'Bitcoin Talk': DataKeys.BITCOINTALK_URL,
            'Website': DataKeys.WEBSITE,
            'Linkedin': DataKeys.LINKEDIN_URL,
            'Twitter': DataKeys.TWITTER_URL
        }

        try:
            soc_tags = bs.find('div', {'class': 'page-details__main'})
            if soc_tags:
                for key, _ in soc_mapping.items():
                    target = soc_tags.find('a', {'title': key})
                    if target and target.has_attr('href'):
                        data[soc_mapping[key]] = target['href']
        except:
            self.logger.error(
                'Something went wrong in {}, when scraping social links'.
                format(url))

        # details
        details_mapping = {
            'START DATE': DataKeys.ICO_START,
            'CLOSE DATE': DataKeys.ICO_END,
            'TOKEN SYMBOL': DataKeys.TOKEN_NAME,
            'SMART CONTRACT BLOCKCHAIN': DataKeys.PLATFORM,
            'AMOUNT RAISED': DataKeys.RAISED
        }
        try:
            details = bs.findAll('div', {'class': 'page-details__info-row'})
            for detail in details:
                title = detail.find('h3',
                                    {'class': 'page-details__info-title'},
                                    text=True)
                if title and title.text.strip().upper() in details_mapping:
                    value = title.find_next_sibling(
                        'div', {'class': 'page-details__info-descr'},
                        text=True)
                    if value:
                        data[details_mapping[
                            title.text.strip().upper()]] = value.text.strip()
        except:
            self.logger.error(
                'Something went wrong in {}, when scraping detail rows'.format(
                    url))

        # description
        try:
            div_tag = bs.find('div',
                              {'class': 'show-more-wrap show-more--big2'})
            description_tag = div_tag.find('h2', text=True)
            if description_tag:
                data[DataKeys.DESCRIPTION] = description_tag.text.strip()
        except:
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'Description'))

        # review scores
        try:
            review_sum = 0
            total_reviews = 0
            review_blocks = bs.findAll('div', {'id': 'section-review-block'})
            reviews = []
            for block in review_blocks:
                reviews += block.findAll('div', {'class': 'rat-stars'})

            for review in reviews:
                score = review.find('span')
                if score and score.has_attr('style'):
                    try:
                        value = int(
                            re.search('\d(\d{1,2})?', score['style']).group())
                        if value == 0:
                            continue
                        review_sum += value
                        total_reviews += 1
                    except:
                        self.logger.warning(
                            'Could not find score percentage from {}'.format(
                                url))

            if total_reviews != 0 and review_sum != 0:
                data[DataKeys.USER_SCORE] = review_sum // total_reviews
        except:
            pass

        TokenTops.process(data)
        return data
Example #8
0
    def scrape_profile(self, url):
        data = DataKeys.initialize()
        data[DataKeys.PROFILE_URL] = url
        data[DataKeys.SOURCE] = SOURCES.ICORATING

        try:
            bs = load_page(url, self.html_parser)
        except:
            self.logger.error('Could not scrape profile {}'.format(url))
            return

        try:
            text = bs.find('div', {'class': 'h1'}).find('h1').text
            # from "ICO NAME (ICN)" to "ICO NAME"
            data[DataKeys.NAME] = text.split('(')[0].strip()
        except:
            self.logger.error(self.NOT_FOUND_MSG.format(url, 'ICO name'))

        try:
            ratings_tag = bs.findAll('span', {'class': 'title'}, text=True)
            for rating in ratings_tag:
                # RISK
                if rating.text.upper() == 'RISK SCORE':
                    risk = rating.parent.find('span', {'class': 'score'},
                                              text=True)
                    if risk:
                        risk_text = risk.text.split('/')
                        if risk_text and len(risk_text) == 2:
                            data[DataKeys.RISK_SCORE] = float(
                                risk_text[0].strip())

                # Hype
                if rating.text.upper() == 'HYPE SCORE':
                    hype = rating.parent.find('span', {'class': 'score'},
                                              text=True)
                    if hype:
                        hype_text = hype.text.split('/')
                        if hype_text and len(hype_text) == 2:
                            data[DataKeys.HYPE_SCORE] = float(
                                hype_text[0].strip())

                # Investment
                if rating.text.upper() == 'INVESTMENT RATING':
                    inv = rating.parent.find('span', {'class': 'name'},
                                             text=True)
                    if inv:
                        value = inv.text.upper()
                        investment_ratings = {
                            'POSITIVE+': 8,
                            'POSITIVE': 7,
                            'STABLE+': 6,
                            'STABLE': 5,
                            'RISKY+': 4,
                            'RISKY': 3,
                            'RISKY-': 2,
                            'NEGATIVE': 1,
                            'NEGATIVE-': 0,
                            'NA': BOOL_VALUES.NOT_AVAILABLE
                        }
                        rating = investment_ratings[value.upper()]
                        if rating:
                            data[DataKeys.ROI_SCORE] = rating
        except:
            self.logger.warning('Exception while scraping {} from {}'.format(
                'rating info', url))

        link_tags = bs.findAll('a', {'target': '_blank'}, text=False)
        soc_mapping = {
            'FACEBOOK': DataKeys.FACEBOOK_URL,
            'GITHUB': DataKeys.GITHUB_URL,
            'MEDIUM': DataKeys.MEDIUM_URL,
            'INSTAGRAM': DataKeys.INSTAGRAM_URL,
            'TELEGRAM': DataKeys.TELEGRAM_URL,
            'REDDIT': DataKeys.REDDIT_URL,
            'BTCTALK': DataKeys.BITCOINTALK_URL,
            'WEBSITE': DataKeys.WEBSITE,
            'LINKEDIN': DataKeys.LINKEDIN_URL,
            'TWITTER': DataKeys.TWITTER_URL
        }

        for link_tag in link_tags:
            try:
                text = link_tag.text.strip().upper()
                key = soc_mapping[text]
                data[key] = link_tag['href']
            except (AttributeError, KeyError):
                continue

        # logo link
        try:

            data[DataKeys.LOGO_PATH] = load_image(
                urljoin(
                    self.domain,
                    bs.find('div', {
                        'class': 'share'
                    }).find_previous_sibling('img')['src']),
                ScraperBase.logo_tmp_path)
        except (AttributeError, KeyError):
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'logo url'))
        except Exception as e:
            self.logger.error('could not download {} logo with: {}'.format(
                url, str(e)))

        # description
        try:
            data[DataKeys.DESCRIPTION] = bs.find(
                'td', text='Description:').find_next_sibling().text.strip()
        except:
            self.logger.warning(self.NOT_FOUND_MSG.format(url, 'description'))

        try:
            bs = load_page(url + '/details', self.html_parser)
        except:
            self.logger.error(
                self.NOT_FOUND_MSG.format(url + '/details', 'info table'))

        info_map = {
            'Pre-ICO start date:': DataKeys.PRE_ICO_START,
            'Pre-ICO end date:': DataKeys.PRE_ICO_END,
            'Hard cap:': DataKeys.HARD_CAP,
            'ICO start date:': DataKeys.ICO_START,
            'ICO end date:': DataKeys.ICO_END,
            'Soft cap:': DataKeys.SOFT_CAP,
            'Ticker:': DataKeys.TOKEN_NAME,
            'ICO Platform:': DataKeys.PLATFORM,
            'Token price in USD:': DataKeys.ICO_PRICE,
            'Accepted Currencies:': DataKeys.ACCEPTED_CURRENCIES,
            'Country Limitations:': DataKeys.COUNTRIES_RESTRICTED,
            'Token Standard:': DataKeys.TOKEN_STANDARD,
            'Registration Country:': DataKeys.COUNTRY
        }

        rows = bs.find_all('td', text=re.compile('.*:$'))
        for row in rows:
            try:
                key = row.text.strip()
                if key in info_map:
                    value = row.find_next_sibling().text.strip()
                    data[info_map[key]] = value
            except AttributeError:
                continue

        IcoRating.process(data)

        return data