Ejemplo n.º 1
0
    def _process_google_item(self, item, spider):
        from sqlalchemy.exc import IntegrityError
        try:
            q = session.query(Company).filter(
                Company.name == item['company_name'])
        except IntegrityError:
            q = session.query(Company).filter(
                Company.name == item['company_name']).first()
        logging.info(
            "IIIIITTTTTTTTTTTTTEEEEEEEEEEEEMMMMMMMMMMMM@@@@@@@@@@@@@@@@@@@@")
        logging.info(item)
        if q.count() and item['update']:
            c = q.first()
            website = 'NA'
            if c.website:
                website = c.website
            elif c.website_long:
                website = urlparse.urlsplit(c.website_long)[1]
            if c.manual_entry == 'Yes':
                q.update({
                    'website': item['url'],
                    'website_long': item['url_long'],
                    'website_updated': datetime.now(),
                    'website_old': website,
                    'last_update': datetime.now(),
                    'manual_entry': 'manual',
                })
                logging.info("MANUAL")
                logging.info("MANUAL")
                logging.info("MANUAL")
                logging.info("MANUAL")

            elif c.manual_entry == 'old':
                q.update({
                    'website': item['url'],
                    'website_long': item['url_long'],
                    'website_updated': datetime.now(),
                    'website_old': website,
                    'last_update': datetime.now(),
                    'manual_entry': 'No'
                })
                session.commit()

            else:
                dn = datetime.now()
                update_item = {
                    'website': item['url'],
                    'website_long': item['url_long'],
                    'website_updated': datetime.now(),
                    'website_old': website,
                    'last_update': dn
                }
                logging.info(update_item)
                q.update(update_item)
        elif not q.count():
            new_company = Company(name=item['company_name'],
                                  website=item['url'],
                                  website_long=item['url_long'])
            session.add(new_company)
Ejemplo n.º 2
0
 def _log_update(self, log):
     calc_log = session.query(CalculationsTime).first()
     if not calc_log:
         calc_log = CalculationsTime(**log)
         session.add(calc_log)
     else:
         session.query(CalculationsTime).update(log)
     session.commit()
Ejemplo n.º 3
0
 def log_start(self, type, description='', additional_data=''):
     le = LogExecutions(
         type=type,
         description=description,
         start_datetime=datetime.now(),
         additional_data=additional_data)
     session.add(le)
     session.commit()
     self.current_session = le
Ejemplo n.º 4
0
 def _process_evaluation_item(self, item, spider):
     q = session.query(DbGoogleEvaluation).filter(
         DbGoogleEvaluation.g_company_website == item['company_website'],
         DbGoogleEvaluation.g_search_word == item['search_word'])
     if q.count() and item['update']:
         q.update({
             'g_found_result':
             item['found_result'],
             'g_search_url':
             item['search_url'],
             'g_last_update':
             datetime.fromtimestamp(item['last_update'])
         })
     elif not q.count():
         new_google_ev = DbGoogleEvaluation(
             g_company_website=item['company_website'],
             g_search_word=item['search_word'],
             g_found_result=int(item['found_result']),
             g_search_url=item['search_url'],
             g_last_update=datetime.fromtimestamp(item['last_update']),
             g_timestamp=datetime.fromtimestamp(item['timestamp']))
         session.add(new_google_ev)
Ejemplo n.º 5
0
    def process_item(self, item, spider):
        logging.info("!!!!!!!!!!ITEM!!!!!!!!!!!!")
        logging.info(item)
        update = item.get('update')
        company_name = item.get('company_name')
        xing_page_url = item.get('xing_page_url')
        impressum_url = item.get('impressum_url')
        description = item.get('about_us')[:8000] if item.get(
            'about_us') else None

        if item.get('partial_update'):
            item = dict(street_xing='',
                        city_xing='',
                        description_xing='',
                        zipcode_xing='',
                        country_xing='',
                        tel_xing='',
                        fax_xing='',
                        company_email_xing='',
                        industry_xing='',
                        established_in_xing=None,
                        products_xing='',
                        employees_size_xing='',
                        company_website_x='N/A',
                        last_update_x=func.now(),
                        employees_group_xing_x='')
        else:
            item = dict(
                street_xing=item.get('street'),
                city_xing=item.get('city'),
                description_xing=description,
                zipcode_xing=item.get('postal_code'),
                country_xing=item.get('country'),
                tel_xing=item.get('phone'),
                fax_xing=item.get('fax'),
                company_email_xing=item.get('email'),
                industry_xing=item.get('industry'),
                established_in_xing=item.get('established'),
                products_xing=item.get('products'),
                employees_size_xing=item.get('employees_number'),
                company_website_x=item.get('url'),
                last_update_x=func.now(),
                employees_group_xing_x=item.get('registered_employees_number'))

        company = session.query(Company).filter_by(name=company_name).first()
        if not company:
            return

        if update:
            #company = company.filter(Company.xing_page != 'NA', Company.xing_page is not None).first()
            session.query(XingCompanyDb).filter(
                XingCompanyDb.xc_id == company.id).update(
                    item, synchronize_session=False)
        else:
            new_entry = XingCompanyDb(company_name_x=company_name,
                                      timestamp_x=func.now(),
                                      xc_id=company.id,
                                      **item)
            session.add(new_entry)

        company.last_update = func.now()
        company.xing_page_update = func.now()
        company.xing_page = xing_page_url
        company.impressum_link = impressum_url
Ejemplo n.º 6
0
    def process_item(self, item, spider):
        logging.info("!!!!!!!!!!ITEM!!!!!!!!!!!!")
        logging.info(item)
        logging.info(spider)
        update = item['update']
        company_name = item['company_name']
        company_name = company_name.decode("utf-8")
        logging.info('PIPELINE COMPANY NAME')
        logging.info(company_name)
        company_website = item['company_website']
        headquarters = item.get('sitz', '')[:50] if item.get('sitz') else None
        manual_update_item = {}
        if item.get('wiki_company_website') and len(
                item['wiki_company_website']) > 130:
            parsed_url = urlparse.urlparse(item['wiki_company_website'])
            item['wiki_company_website'] = '{protocol}://{hostname}'.format(
                protocol=parsed_url.scheme, hostname=parsed_url.hostname)

        if item.get('partial_update'):
            item = dict(summary_wikipedia_w='',
                        categories_wikipedia_w='',
                        revenue_wikipedia_w='',
                        revenue_currency_wiki_w='',
                        branch_wikipedia_w='',
                        wiki_url_w='N/A',
                        headquarters_wiki_w='',
                        employees_wikipedia_w='',
                        company_website_w='',
                        last_update_w=func.now())
            logging.info('PIPELINE ITEM DICT 1')
            logging.info(item)
        else:
            item = dict(summary_wikipedia_w=item['summary'],
                        categories_wikipedia_w=item['categories'],
                        revenue_wikipedia_w=item.get('revenue', ''),
                        revenue_currency_wiki_w=item.get('currency', ''),
                        branch_wikipedia_w=item.get('branche', ''),
                        wiki_url_w=item['url'],
                        headquarters_wiki_w=headquarters,
                        employees_wikipedia_w=item.get('mitarbeiter', ''),
                        company_website_w=item.get('wiki_company_website', ''),
                        last_update_w=func.now())
            logging.info('PIPELINE ITEM DICT 2')
            logging.info(item)

            manual_update_item = dict(
                summary_wikipedia_w=item['summary'],
                categories_wikipedia_w=item['categories'],
                revenue_wikipedia_w=item.get('revenue', ''),
                revenue_currency_wiki_w=item.get('currency', ''),
                branch_wikipedia_w=item.get('branche', ''),
                headquarters_wiki_w=headquarters,
                employees_wikipedia_w=item.get('mitarbeiter', ''),
                company_website_w=item.get('wiki_company_website', ''),
                last_update_w=func.now())

        company = session.query(Company).filter_by(name=company_name,
                                                   website=company_website)
        logging.info('PIPLINE COMPANY 1')
        logging.info(company)
        if not company.count():
            company = session.query(Company).filter_by(name=company_name)
            logging.info('PIPLINE COMPANY 2')
            logging.info(company)

        company = company.first()
        logging.info('PIPLINE COMPANY first')
        logging.info(company)
        wiki_company = session.query(WikipediaDb).filter(
            WikipediaDb.company_name_w == company_name)
        new_entry = WikipediaDb(company_name_w=company_name,
                                timestamp_w=func.now(),
                                wc_id=company.id,
                                **item)

        if update and wiki_company.count() and (
                not company.is_wiki_manualy_u or spider.is_manual_update_wiki):
            if wiki_company[0].manual_entry == "Yes":
                wiki_company.update(manual_update_item,
                                    synchronize_session=False)
            elif wiki_company[0].manual_entry == "manual":
                wiki_company.update(manual_update_item,
                                    synchronize_session=False)
            elif wiki_company[0].manual_entry == "confirmed":
                wiki_company.update(manual_update_item,
                                    synchronize_session=False)
            else:
                wiki_company.update(item, synchronize_session=False)
        elif not wiki_company.count():
            session.add(new_entry)

        if not company.is_wiki_manualy_u or spider.is_manual_update_wiki:
            company.is_wiki_manualy_u = True
            company.last_update = func.now()
            company.wiki_evaluation = func.now()
            company.wikipedia_url = item['wiki_url_w']
Ejemplo n.º 7
0
def create_report(companies, account_data=[], account_headers=[], total_fields=[], data_links={}, google_analytics_companies={},
                  dates={}):
    """
    Creates and saves locally report.
    :param companies: List of companies that made requests during specified range
    """
    logger.debug(companies)

    file_name = settings.REPORTS_FILE.format(now=datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S"))
    path_to_xl = settings.rel('mx_crm', settings.REPORTS_FOLDER, file_name)
    logger.debug('Export excel file: {}'.format(path_to_xl))

    wb = Workbook()
    ws = wb.create_sheet('Report')

    logger.info('Saving report to the local excel file')
    wb_headers = settings.NEW_WORKBOOK_HEADERS
    # wb_headers = settings.WORKBOOK_HEADERS
    if account_headers:
        wb_headers += account_headers
    if total_fields:
        wb_headers += settings.TOTAL_HEADERS
    wb_headers += settings.RATING_HEADERS
    ws.append(wb_headers)

    companies_info = get_companies_info(companies)
    logger.info('companies_info')
    logger.info(companies_info)
    companies_info_manual_id = get_company_table_info(companies)
    logger.info('companies_info_manual_id')
    logger.info(companies_info_manual_id)
    # manual
    companies_info_websites = get_companies_info_websites(companies)
    logger.debug('Companies: {}'.format(len(companies_info)))

    companies_wiki_info = get_wiki_info(companies)
    logger.debug('Wiki companies: {}'.format(len(companies_wiki_info)))

    companies_xing_info = get_xing_info(companies)
    logger.debug('Xing companies: {}'.format(len(companies_xing_info)))

    companies_names = set()
    websites_for_rating = set()
    for c in companies_info.values():
        if c.website:
            websites_for_rating.add(c.website)
        if c.name:
            companies_names.add(c.name)
    rating_data = SquirrelRating().calc(companies=companies_names, websites=websites_for_rating)
    company_manual_account = get_manual_account(companies_names)
    variables_data = SquirrelRating().get_rating_variables(companies, websites_for_rating)
    #logger.info("rating data {}".format(rating_data))
    #logger.info("rating data {}".format(type(rating_data)))

    try:
        counter = 0
        for company_name, company in sorted(companies.items(), key=lambda x: x[1].session_length, reverse=True):
            ws.row_dimensions[counter].collapsed = True
            address = company.full_address
            country = company.country
            # rating = rating_data.get(company.company_name).get('score')
            wiki_info = companies_wiki_info.get(company_name)
            xing_info = companies_xing_info.get(company_name)
            company_info = companies_info.get(company_name)
            company_table_manual_id = companies_info_manual_id.get(company_name)
            website = company_info.website if company_info else ''
            full_website = re.sub('www\d?\.', '', website).rstrip('/').lower()

            prepared_company_name = company_name
            xing_page = company_info.xing_page if company_info else None
            session_length = company.session_length

            for session in company.sessions:
                for request in session.requests:
                    #master_company = alchemy_session.query(Company.name).filter(Company.name == company.company_name)
                    access_history = MxCrmAccessHistory(
                        company_name=company.company_name,
                        a_h_sid=counter,
                        mx_crm_visited_page=request.title,
                        mx_crm_referrer=request.url[:255],
                        mx_crm_session_date=datetime.datetime.fromtimestamp(int(request.timestamp)).strftime(
                            '%Y-%m-%d'),
                        mx_crm_session_time=datetime.datetime.fromtimestamp(int(request.timestamp)).strftime(
                            '%H:%M:%S'),
                        mx_crm_ip_vlan=request.hostname
                    )
                    alchemy_session.add(access_history)
                    alchemy_session.commit()
                    sheet_counter = 2
                    company_table_info = get_manual_website(company.company_name)
                    access_dt = datetime.datetime.fromtimestamp(request.timestamp).strftime('%Y-%m-%d %H:%M:%S')
                    rcd_name_rating = companies_info.get(company_name)
                    if rcd_name_rating and rcd_name_rating.name:
                        rating = rating_data.get(rcd_name_rating.name, 'N/C') if rating_data.get(
                            rcd_name_rating.name) is not None else 'N/C'

                    if company_name in total_fields:
                        obj = total_fields.get(company_name, {})
                        total_session_lenght = datetime.timedelta(seconds=obj.get('time') or 0)
                    # row = [company.company_name]
                    sheet_number = 'A{}'.format(sheet_counter)
                    # ws[sheet_number].hyperlink = "http://google.com"
                    # ws[sheet_number].value = company.company_name
                    # ws.cell(row=1, column=sheet_counter).value = '=HYPERLINK("{}", "{}")'.format('google.com', company.company_name)
                    link = ''
                    # pprint(company.company_name)
                    link = data_links.get(company.company_name.lower())
                    c_id = alchemy_session.query(Company.id).filter(Company.name == company.company_name)

                    try:
                        company_id = c_id[0][0]
                        webinterface_link = "http://192.168.0.141:8000/squirrel/accounts/{}/".format(company_id)
                    except IndexError:
                        company_id = ''
                    webinterface_link = "http://192.168.0.141:8000/squirrel/accounts/search/{}/".format(company.company_name)
                    # pprint(link)

                    query_link = alchemy_session.query(Company).filter(Company.name == company.company_name)
                    query_link.update({Company.d_crm_link: link}, synchronize_session="fetch")
                    alchemy_session.commit()
                    row = ['=HYPERLINK("{}", "{}")'.format(webinterface_link, company.company_name),
                           company_table_info.get(company.company_name), website, session_length, total_session_lenght,
                           rating_data.get(company.company_name), address, request.title,
                           request.url, access_dt, country]
                    sheet_counter += 1
                    # pprint(type(row))
                    if wiki_info:
                        row.extend([
                            wiki_info.manual_entry,
                            wiki_info.wiki_url_w,
                            convert_to_float(wiki_info.revenue_wikipedia_w),
                            wiki_info.revenue_currency_wiki_w,
                            convert_to_int(wiki_info.employees_wikipedia_w),
                            wiki_info.categories_wikipedia_w,
                            wiki_info.branch_wikipedia_w,
                            wiki_info.summary_wikipedia_w,
                        ])
                    else:
                        row.extend([''] * 8)

                    if xing_info:
                        if company_table_manual_id.manual_account_id:
                            c_t_manual_id = company_table_manual_id.manual_account_id
                        elif company_table_manual_id.manual_account_id == u'':
                            c_t_manual_id = u'NONE'
                        elif company_table_manual_id.manual_account_id == '':
                            c_t_manual_id = u'NONE'
                        else:
                            c_t_manual_id = u'NONE'
                        row.extend([
                            xing_info.manual_entry,
                            xing_page,
                            xing_info.country_xing,
                            xing_info.employees_group_xing_x,
                            xing_info.employees_size_xing,
                            xing_info.description_xing,
                            xing_info.industry_xing,
                            c_t_manual_id
                            # company_manual_account.get(company_name)
                        ])
                    else:
                        row.extend([''] * 8)

                    if full_website in account_data or prepared_company_name in account_data:
                        data_to_extend = []
                        for key in account_headers:
                            if full_website in account_data:
                                value = account_data[full_website].get(key, '')
                            else:
                                value = account_data[prepared_company_name].get(key, '')
                            data_to_extend.append(value)
                        row.extend(data_to_extend)
                    elif account_headers:
                        row.extend([''] * len(account_headers))

                    if company_name in total_fields:
                        obj = total_fields.get(company_name, {})
                        row.extend([
                            datetime.timedelta(seconds=obj.get('time') or 0),
                            convert_to_int(obj.get('visited')),
                            obj.get('last_visited'),
                        ])
                    else:
                        row.extend([''] * len(settings.TOTAL_HEADERS))

                    rcd_name = companies_info.get(company_name)
                    if rcd_name and rcd_name.name:
                        if wiki_info:
                            row.extend([
                                wiki_info.manual_entry
                            ])
                        else:
                            row.extend([""])

                        if xing_info:
                            row.extend([
                                xing_info.manual_entry
                            ])
                        else:
                            row.extend([""])
                        query = alchemy_session.query(Company).filter(Company.name == rcd_name.name)
                        dict_for_save = dict(mx_crm_location_level=variables_data.get(rcd_name.name).get('location'),
                                             mx_crm_branch_level=variables_data.get(rcd_name.name).get('branch'),
                                             mx_crm_google_evaluation=variables_data.get(rcd_name.name).get(
                                                 'google_ev'),
                                             mx_crm_wiki_rating_points=variables_data.get(rcd_name.name).get(
                                                 'wiki_size'),
                                             mx_crm_xing_rating_points=variables_data.get(rcd_name.name).get(
                                                 'xing_size'),
                                             mx_crm_revenue_level=variables_data.get(rcd_name.name).get(
                                                 'revenue_point'))
                        rating_update_info = dict(
                            mx_crm_location_level=variables_data.get(rcd_name.name).get('location'),
                            mx_crm_branch_level=variables_data.get(rcd_name.name).get('branch'),
                            mx_crm_google_evaluation=float(variables_data.get(rcd_name.name).get('google_ev')),
                            mx_crm_wiki_rating_points=variables_data.get(rcd_name.name).get('wiki_size'),
                            mx_crm_xing_rating_points=variables_data.get(rcd_name.name).get('xing_size'),
                            mx_crm_revenue_level=variables_data.get(rcd_name.name).get('revenue_point'))
                        query.update(rating_update_info, synchronize_session=False)
                        relation_ship_type = row[36]
                        account_name = row[27]
                        account_owner = row[28]
                        abc_rating = row[38]

                        closed_activity_type = row[31]
                        if row[32] != '':
                            closed_date = row[32]
                        else:
                            closed_date = None
                        # closed_date = datetime.datetime.strptime(str(row[32]), '%m/%d/%Y %H:%M:%S')
                        open_activity_type = row[33]
                        if row[34] != '':
                            schedule_date = row[34]
                        else:
                            schedule_date = None
                        # schedule_date = datetime.datetime.strptime(str(row[34]), '%m/%d/%Y %H:%M:%S')
                        total_session_length = row[39]
                        total_visited_page = row[40]
                        last_visit_time = row[41]

                        alchemy_session.commit()
                        dynamics_crm_info = dict(d_crm_relationship_type=relation_ship_type,
                                                 d_crm_account_name=account_name,
                                                 d_crm_account_owner=account_owner,
                                                 d_crm_abc_rating=abc_rating,
                                                 d_crm_closed_activity_type=closed_activity_type,
                                                 d_crm_open_activity_type=open_activity_type,
                                                 d_crm_closed_date=closed_date,
                                                 d_crm_schedule_date=schedule_date,
                                                 mx_crm_total_session_length=total_session_length,
                                                 mx_crm_total_visited_pages=total_visited_page,
                                                 mx_crm_last_visit=last_visit_time,
                                                 squirrel_rating=rating_data.get(rcd_name.name))
                                                 #webinterface_link=webinterface_link) # also in this query save webinterface link

                        query_dynamics_crm = alchemy_session.query(Company).filter(Company.name == rcd_name.name)
                        query_dynamics_crm.update(dynamics_crm_info, synchronize_session=False)
                        alchemy_session.commit()
                        row.extend([
                            rating_data.get(rcd_name.name, 'N/C') if rating_data.get(
                                rcd_name.name) is not None else 'N/C',
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('location')
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('branch')
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('google_ev')
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('wiki_size')
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('xing_size')
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('revenue_point')
                        ])
                    else:
                        row.extend(['N/C'] * len(settings.RATING_HEADERS))

                    try:
                        ws.append(row)
                    except ValueError as e:
                        logger.info(e)
                    counter += 1
                    if not ws.row_dimensions[counter - 1].collapsed:
                        ws.row_dimensions[counter].hidden = True
                        ws.row_dimensions[counter].outlineLevel = 1

        wb.save(path_to_xl)
        d_start = dates.get('start_date')
        e_date = dates.get('end_date')
        start_date = datetime.datetime(d_start.year, d_start.month, d_start.day)
        end_date = datetime.datetime(e_date.year, e_date.month, e_date.day)
        # g_a_c = get_google_analytics_sessions(start_date, end_date, True)
        # logger.info(g_a_c)
        # logger.info(google_analytics_companies)
        # result = add_google_analytics_accounts_to_report_file(path_to_xl, start_date, end_date)
        # os.chdir("C:/Users/admin/PycharmProjects/SquirrelRunnerNew/mx_crm")
        # cd = os.system('python add_companies.py --days_start={0} --year_start={1} --month_start={2} --days_end={3} --year_end={4} --month_end={5}'.format(
        #     d_start.day, d_start.year, d_start.month, e_date.day, e_date.year, e_date.month
        # ))
        # logger.info(cd)

    except KeyError as e:
        logger.error(e)
    logger.info('Local file has been updated')
Ejemplo n.º 8
0
def get_drupal_sessions(start_time, end_time):
    """
    Extracts request sessions from accesslog table.
    :param start_time: time to extract requests from
    :param end_time: time to extract requests to
    :return: Dictionary with sessions info separated by companies.
    """
    logger.info("Started sessions extraction")

    timestamp_start_time = (start_time - datetime(1970, 1, 1)).total_seconds()
    timestamp_end_time = (end_time - datetime(1970, 1, 1)).total_seconds()

    readable_s = datetime.fromtimestamp(timestamp_start_time)
    readable_e = datetime.fromtimestamp(timestamp_end_time)
    access_hosts = session.query(
        Accesslog.timestamp, Accesslog.hostname, Accesslog.path, Accesslog.url,
        Accesslog.title
    ).filter(
        # between(Accesslog.timestamp, timestamp_start_time, timestamp_end_time),
        between(Accesslog.timestamp, func.unix_timestamp(start_time),
                func.unix_timestamp(end_time)),
        Accesslog.title != 'Generate image style',
        Accesslog.hostname.notin_(settings.IPS_BLACKLIST)).order_by(
            Accesslog.hostname, Accesslog.timestamp)
    accesslog = [Access(*res) for res in access_hosts]

    blacklist = {
        tup[0].lower()
        for tup in session.query(Company.name).filter(
            Company.type_main.in_(['Blacklist', 'Spam', 'Provider']))
    }

    ips_info = {
        tup[0]: tup[1:]
        for tup in session.query(DbIpDatabase.ip_ip, DbIpDatabase.ip_country,
                                 DbIpDatabase.ip_name, DbIpDatabase.ip_name_2,
                                 DbIpDatabase.ip_address)
    }

    res = {}
    drupal_session = DrupalSession()
    session_length = 0
    len_accesslog = len(accesslog[:-1]) - 1
    for index, request in enumerate(accesslog[:-1]):
        host = ip_digits(request.hostname)
        access_datetime = datetime.fromtimestamp(int(request.timestamp))

        next_request = accesslog[index + 1]
        next_request_host = ip_digits(next_request.hostname)
        next_request_access_datetime = datetime.fromtimestamp(
            int(next_request.timestamp))

        difference = next_request_access_datetime - access_datetime

        is_continue = False
        if host == next_request_host and difference.seconds < settings.MAXIMUM_DIFFERENCE_BETWEEN_SESSIONS.seconds:
            session_length += difference.seconds
            is_continue = True
        elif host == next_request_host:
            session_length += settings.LONG_SESSION_DEFAULT
            is_continue = True
        elif host != next_request_host:
            session_length += settings.LONG_SESSION_DEFAULT

        if index and host == ip_digits(
                accesslog[index - 1].hostname) and host != next_request_host:
            drupal_session.append(request)
        elif host == next_request_host:
            drupal_session.append(request)
            is_continue = True

        if is_continue and index != len_accesslog:
            continue

        if host in ips_info:
            country, company_name, address_result, full_address_result = ips_info[
                host]
        else:
            country = company_name = address_result = full_address_result = ''
            try:
                country, company_name, address_result, full_address_result = get_whois(
                    host)
            except Exception as e:
                logger.error(
                    'get_whois function (RIPE) got an error for host: {}\nError: {}'
                    .format(host, str(e)))
                continue
            finally:
                address_result = address_result[:250]
                logger.debug(address_result)
                full_address_result = full_address_result[:350]

                new_entry = DbIpDatabase(ip_ip=host,
                                         ip_country=country,
                                         ip_name=company_name,
                                         ip_name_2=address_result,
                                         ip_address=full_address_result,
                                         ip_host=host,
                                         ip_timestamp=func.now())
                session.add(new_entry)

                ips_info[host] = (country, company_name, address_result,
                                  full_address_result)

        company_name = company_name.lower()

        if company_name and country in settings.RELEVANT_COUNTRIES \
                and company_name not in settings.PROVIDERS_BLACKLIST \
                and company_name not in blacklist \
                and not any(word in company_name for word in settings.COMPANIES_BLACKLIST) \
                and not any(re.search(regexp, company_name) for regexp in settings.PROVIDERS_BLACKLIST_REGEXPS) \
                and not any(re.search(regexp, company_name) for regexp in settings.COMPANIES_BLACKLIST_REGEXPS):

            if company_name not in res:
                res[company_name] = CompanyEntry(*ips_info[host], sessions=[])

            res[company_name].sessions.append(drupal_session)
            res[company_name].session_length = timedelta(
                seconds=session_length)

        drupal_session = DrupalSession()
        session_length = 0

        session.commit()
    logger.info('Sessions extraction has been finished successfully.')
    return res