コード例 #1
0
def update_squirrel_rating():
    companies_names_xing = ['thuega meteringservice gmbh']
    for name in companies_names_xing:
        pprint(name)
        query_x_url = session.query(XingCompanyDb.company_name_x,
                                    XingCompanyDb.xing_url).filter(
                                        XingCompanyDb.company_name_x == name, )
        try:
            xing_url = query_x_url[0][1]
        except IndexError:
            xing_url = u''
        pprint(xing_url)
        if xing_url != u'':
            query = session.query(XingCompanyDb).filter(
                XingCompanyDb.company_name_x == name, )
            pprint("zazaz")
        else:
            query_x_p = session.query(
                Company.xing_page).filter(Company.name == name)
            xing_page = query_x_p[0][0]
            pprint(xing_page)
            query = session.query(XingCompanyDb).filter(
                XingCompanyDb.company_name_x == name, )
            query.update({XingCompanyDb.xing_url: xing_page},
                         synchronize_session="fetch")
            session.commit()
コード例 #2
0
ファイル: queries.py プロジェクト: alexpinkevichwork/squirrel
def get_companies_for_xing(companies, force_update):
    """
    Prepares list of companies' names for searching in xing
    :param companies: List of companies that made requests during specified range
    :param force_update: force update companies info in database from spiders
    """
    existing_entries = session.query(XingCompanyDb).join(
        Company, Company.id == XingCompanyDb.xc_id).filter(
            Company.name.in_(companies),
            Company.xing_page != 'NA',
            Company.xing_page is not None,
        )

    existing_objects_by_name = set(
        session.query(XingCompanyDb).filter(
            XingCompanyDb.company_name_x.in_(companies)))
    to_delete_ids = {
        c.x_id
        for c in existing_objects_by_name - set(existing_entries)
    }
    if to_delete_ids:
        session.query(XingCompanyDb).filter(
            XingCompanyDb.x_id.in_(to_delete_ids)).delete(
                synchronize_session='fetch')
        session.commit()

    existing_names = {
        entry.company_name_x.lower()
        for entry in existing_entries
    }
    res = set(companies) - existing_names

    if force_update:
        res.update({u'update_' + name for name in existing_names})
    return res
コード例 #3
0
    def recalculate_per(self, timestamp=settings.TWO_WEEKS_AGO):
        self._load_accesslogs_timestamps_to_memory(timestamp=timestamp)
        update_list = []
        for index, item in enumerate(
                self.get_db_ip(hosts=self.accesslogs_timestamps.keys())):
            timestamps = self.filter_accesslogs_timestamp(item)
            session_total = self.session_total_by_host(timestamps)

            update_dict = {
                'ip_id':
                item.ip_id,
                'total_session_length': (item.total_session_length or 0) +
                session_total.get('time', 0),
                'total_visit_count': (item.total_visit_count or 0) +
                session_total.get('visited', 0),
                'last_total_update':
                session_total.get('last_timestamp', time.time()),
            }
            update_list.append(update_dict)

            if index and index % 5000 == 0:
                session.bulk_update_mappings(DbIpDatabase, update_list)
                session.commit()
                update_list = []
                logger.info('Updated %s records.' % str(index))

        session.bulk_update_mappings(DbIpDatabase, update_list)
        session.commit()
        logger.info('Updated %s records.' % str(index))
        self._log_update({'total_fields_last_calculation': time.time()})
コード例 #4
0
def main():
    """
        looks up the maximum timestamp of squirrel
        and imports the data since then from drupal accesslog,
        make sure you are not connected to the T-mobile stick
        or the database connection to drupal will fail
    """
    logger.info("Start synchronize accesslogs.")
    start_time = time.time()

    logger.info("Get max current timestamp.")
    local_accesslog = session.query(func.max(Accesslog.timestamp)).first()
    local_accesslog = local_accesslog[0] if local_accesslog else None
    if not local_accesslog:
        return
    logger.info("Get all new accesslogs.")
    drupal_accesslogs = drupal_session.query(Accesslog).filter(
        Accesslog.timestamp > local_accesslog)

    logger.info("Build bulk insert query.")
    session.bulk_insert_mappings(Accesslog, [
        dict(aid=i.aid,
             sid=i.sid,
             title=i.title,
             path=i.path,
             url=i.url,
             hostname=i.hostname,
             uid=i.uid,
             timer=i.timer,
             timestamp=i.timestamp) for i in drupal_accesslogs
    ])
    session.commit()
    logger.info("Data loaded in %s seconds. Count: %s" %
                (str(time.time() - start_time), drupal_accesslogs.count()))
コード例 #5
0
    def process_item(self, item, spider):
        logging.info("!!!!!!!!!!ITEM!!!!!!!!!!!!")
        logging.info(item)
        company_name = item['company_name']
        company_website = item['wiki_company_website']
        headquarters = item.get('sitz', '')[:50] if item.get('sitz') else None
        if item.get('wiki_company_website') and len(
                item['wiki_company_website']) > 130:
            parsed_url = urlparse.urlparse(item['wiki_company_website'])
            item['wiki_company_website'] = '{protocol}://{hostname}'.format(
                protocol=parsed_url.scheme, hostname=parsed_url.hostname)

        item = dict(summary_wikipedia_w=item['summary'],
                    categories_wikipedia_w=item['categories'],
                    revenue_wikipedia_w=item.get('revenue', ''),
                    revenue_currency_wiki_w=item.get('currency', ''),
                    branch_wikipedia_w=item.get('branche', ''),
                    wiki_url_w=item['company_website'],
                    headquarters_wiki_w=headquarters,
                    employees_wikipedia_w=item.get('mitarbeiter', ''),
                    company_website_w=item.get('wiki_company_website', ''),
                    last_update_w=func.now())
        query = session.query(WikipediaDb).filter(
            WikipediaDb.company_name_w == company_name, )

        # wiki_company.update(item, synchronize_session='fetch')
        query.update(item, synchronize_session=False)
        if query[0].manual_entry == "old":
            query.update({WikipediaDb.manual_entry: "No"},
                         synchronize_session="fetch")
        else:
            query.update({WikipediaDb.manual_entry: "manual"},
                         synchronize_session="fetch")
        session.commit()
コード例 #6
0
 def mass_evaluation(self):
     project_name = 'default'
     scrapyd_data = {'project': project_name}
     force_update = True
     query = session.query(Company.name).filter(
         Company.manual_entry == "Yes", )
     query.update({Company.manual_entry: "manual"},
                  synchronize_session="fetch")
     session.commit()
     companies = []
     for name in query:
         name = u'update_{}'.format(name[0].lower())
         companies.append(name)
     #companies = q.get_companies_for_google_search(companies_names, force_update)
     #companies = SPLITTER.join(companies)
     logger.debug(companies)
     scrapyd_data.update(spider=GOOGLE_NAME, companies=companies)
     requests.post(SCRAPYD_SCHEDULE_URL, scrapyd_data)
     while True:
         resp = get_scrapyd_jobs(project_name)
         if len(resp['pending']) or len(resp['running']):
             logger.debug('{} spider still working'.format("goggle"))
             time.sleep(5)
         else:
             time.sleep(10)
             break
     logger.info('Updating resources...')
     from mx_crm.synchronizers.resource_sync import ResourceSync
     RS = ResourceSync()
     RS.sync_all()
コード例 #7
0
ファイル: queries.py プロジェクト: alexpinkevichwork/squirrel
def fixing_wrong_old_wiki(name):
    query = session.query(WikipediaDb).filter(
        WikipediaDb.company_name_w == name, )
    query.update({WikipediaDb.manual_entry: "No"}, synchronize_session="fetch")
    query.update({WikipediaDb.last_update_w: func.now()},
                 synchronize_session="fetch")
    session.commit()
コード例 #8
0
 def xing_update(self, xing_names_urls):
     project_name = 'default'
     scrapyd_data = {'project': project_name}
     xing_login = '******'
     xing_password = '******'
     for name, url in xing_names_urls.iteritems():
         if url != u'' or u'N/A':
             #scrapyd_data.update(spider=XING_MANUAL_NAME, companies=name, urls=url,
             #                    login=xing_login, password=xing_password)
             #requests.post(SCRAPYD_SCHEDULE_URL, scrapyd_data)
             query = session.query(XingCompanyDb).filter(
                 XingCompanyDb.company_name_x == name, )
             query.update({XingCompanyDb.manual_entry: "Yes"},
                          synchronize_session="fetch")
             session.commit()
         else:
             query_x_p = session.query(
                 Company.xing_page).filter(Company.name == name)
             xing_page = query_x_p[0][0]
             query = session.query(XingCompanyDb).filter(
                 XingCompanyDb.company_name_x == name, )
             query.update({XingCompanyDb.manual_entry: "Yes"},
                          synchronize_session="fetch")
             query.update({XingCompanyDb.xing_url: xing_page},
                          synchronize_session="fetch")
             session.commit()
             logger.info("PROBLEMS !!!!")
             logger.info(name)
             logger.info(name)
             logger.info(name)
コード例 #9
0
    def recalculate(self, companies_names=[]):
        self._load_accesslogs_timestamps_to_memory()
        update_list = []
        for index, item in enumerate(
                self.get_db_ip(companies_names=companies_names)):
            timestamps = self.accesslogs_timestamps.get(
                re.sub('\d+$', '0', item.ip_ip), [])
            session_total = self.session_total_by_host(timestamps)

            update_dict = {
                'ip_id':
                item.ip_id,
                'total_session_length':
                session_total.get('time', 0),
                'total_visit_count':
                session_total.get('visited', 0),
                'last_total_update':
                session_total.get('last_timestamp', time.time()),
            }
            update_list.append(update_dict)

            if index and index % 5000 == 0:
                session.bulk_update_mappings(DbIpDatabase, update_list)
                session.commit()
                update_list = []
                logger.info('Updated %s records.' % str(index))

        session.bulk_update_mappings(DbIpDatabase, update_list)
        session.commit()
        logger.info('Updated %s records.' % str(index))
        self._log_update({'total_fields_last_full_calculation': time.time()})
コード例 #10
0
 def update_squirrel_rating(self, companies_names=[]):
     names = []
     websites = []
     for name in companies_names:
         query = session.query(Company.website).filter(Company.name == name)
         websites.append(query[0][0])
     rating_parts = SquirrelRating().calc(companies_names, websites, True)
     for name in rating_parts.keys():
         names.append(name)
     for name in names:
         rating_update_info = dict(
             mx_crm_location_level=rating_parts.get(name).get('location'),
             mx_crm_branch_level=rating_parts.get(name).get('branch'),
             mx_crm_google_evaluation=rating_parts.get(name).get(
                 'google_ev'),
             mx_crm_wiki_rating_points=rating_parts.get(name).get(
                 'wiki_size'),
             mx_crm_xing_rating_points=rating_parts.get(name).get(
                 'xing_size'),
             mx_crm_revenue_level=rating_parts.get(name).get(
                 'revenue_point'),
             squirrel_rating=rating_parts.get(name).get('score'))
         query = session.query(Company).filter(Company.name == name)
         query.update(rating_update_info, synchronize_session=False)
         session.commit()
コード例 #11
0
    def _process_google_item(self, item, spider):
        from sqlalchemy.exc import IntegrityError
        try:
            q = session.query(Company).filter(
                Company.name == item['company_name'])
        except IntegrityError:
            q = session.query(Company).filter(
                Company.name == item['company_name']).first()
        logging.info(
            "IIIIITTTTTTTTTTTTTEEEEEEEEEEEEMMMMMMMMMMMM@@@@@@@@@@@@@@@@@@@@")
        logging.info(item)
        if q.count() and item['update']:
            c = q.first()
            website = 'NA'
            if c.website:
                website = c.website
            elif c.website_long:
                website = urlparse.urlsplit(c.website_long)[1]
            if c.manual_entry == 'Yes':
                q.update({
                    'website': item['url'],
                    'website_long': item['url_long'],
                    'website_updated': datetime.now(),
                    'website_old': website,
                    'last_update': datetime.now(),
                    'manual_entry': 'manual',
                })
                logging.info("MANUAL")
                logging.info("MANUAL")
                logging.info("MANUAL")
                logging.info("MANUAL")

            elif c.manual_entry == 'old':
                q.update({
                    'website': item['url'],
                    'website_long': item['url_long'],
                    'website_updated': datetime.now(),
                    'website_old': website,
                    'last_update': datetime.now(),
                    'manual_entry': 'No'
                })
                session.commit()

            else:
                dn = datetime.now()
                update_item = {
                    'website': item['url'],
                    'website_long': item['url_long'],
                    'website_updated': datetime.now(),
                    'website_old': website,
                    'last_update': dn
                }
                logging.info(update_item)
                q.update(update_item)
        elif not q.count():
            new_company = Company(name=item['company_name'],
                                  website=item['url'],
                                  website_long=item['url_long'])
            session.add(new_company)
コード例 #12
0
ファイル: queries.py プロジェクト: alexpinkevichwork/squirrel
def update_db_hosts():
    ips = session.query(DbIpDatabase)
    logger.info(
        'Starting update IPs ({}) from 255.255.255.255 to 255.255.255.0'.
        format(ips.count()))
    for ip in ips:
        ip.ip_ip = ip_digits(ip.ip_ip)
    session.commit()
コード例 #13
0
 def _log_update(self, log):
     calc_log = session.query(CalculationsTime).first()
     if not calc_log:
         calc_log = CalculationsTime(**log)
         session.add(calc_log)
     else:
         session.query(CalculationsTime).update(log)
     session.commit()
コード例 #14
0
ファイル: queries.py プロジェクト: alexpinkevichwork/squirrel
def fixing_wrong_old(name):
    query = session.query(XingCompanyDb).filter(
        XingCompanyDb.company_name_x == name, )
    query.update({XingCompanyDb.manual_entry: "No"},
                 synchronize_session="fetch")
    query.update({XingCompanyDb.last_update_x: func.now()},
                 synchronize_session="fetch")
    session.commit()
コード例 #15
0
    def mass_update(self, company_name, xing_login, xing_password,
                    new_xing_url):
        xing_url = new_xing_url
        f = open("mx_crm/manual_queries/xing_url.txt", "w")
        f.write(xing_url)
        f.close()
        print('*' * 50)
        print('Start updating xing info for company {}'.format(company_name))
        query = session.query(XingCompanyDb).filter(
            XingCompanyDb.company_name_x == company_name, )
        query.update({XingCompanyDb.manual_entry: "ololo"},
                     synchronize_session="fetch")
        query.update({XingCompanyDb.xing_url: new_xing_url},
                     synchronize_session="fetch")
        session.commit()
        print('*' * 50)

        project_name = 'default'
        scrapyd_data = {'project': project_name}
        decode_company_name = u'{}'.format(company_name.decode('utf-8'))
        print decode_company_name
        company_name_lower = u'update_{}'.format(
            decode_company_name[0].lower())
        update_company_name = company_name_lower + decode_company_name[1:]
        print(update_company_name)

        companies_names = []
        force_update = True
        companies_names.append(decode_company_name.lower())

        print('Start parsing given xing url {}'.format(xing_url))
        companies = q.get_companies_for_xing(companies_names, force_update)
        companies = SPLITTER.join(companies)
        scrapyd_data.update(spider=XING_NAME,
                            companies=companies,
                            login=xing_login,
                            password=xing_password)
        requests.post(SCRAPYD_SCHEDULE_URL, scrapyd_data)
        while True:
            from mx_crm.utils import get_scrapyd_jobs
            resp = get_scrapyd_jobs(project_name)
            if not len(resp['finished']):
                time.sleep(3)
            else:
                break
        requests.post(SCRAPYD_SCHEDULE_URL, scrapyd_data)
        while True:
            from mx_crm.utils import get_scrapyd_jobs
            resp = get_scrapyd_jobs(project_name)
            if not len(resp['finished']):
                time.sleep(3)
            else:
                break
        logger.info('Updating resources...')
        from mx_crm.synchronizers.resource_sync import ResourceSync
        RS = ResourceSync()
        RS.xing_sync()
コード例 #16
0
 def log_start(self, type, description='', additional_data=''):
     le = LogExecutions(
         type=type,
         description=description,
         start_datetime=datetime.now(),
         additional_data=additional_data)
     session.add(le)
     session.commit()
     self.current_session = le
コード例 #17
0
ファイル: queries.py プロジェクト: alexpinkevichwork/squirrel
def set_wikipedia_manual_entry_manual(companies=[]):
    for i in companies:
        try:
            query = session.query(WikipediaDb).filter(
                WikipediaDb.company_name_w == i, )
            query.update({WikipediaDb.manual_entry: "manual"},
                         synchronize_session="fetch")
            session.commit()
        except:
            continue
コード例 #18
0
    def update_google_url(self, company_name, google_url):
        print('*' * 50)
        print('Start updating google website for company {}'.format(
            company_name))
        query = session.query(Company).filter(Company.name == company_name, )

        query.update({Company.manual_entry: "yes"},
                     synchronize_session="fetch")
        query.update({Company.website: google_url},
                     synchronize_session="fetch")
        session.commit()
        print('*' * 50)
コード例 #19
0
    def update_wikipedia_url(company_name, wikipedia_url):
        print('*' * 50)
        print(
            'Start updating wikipedia url for company {}'.format(company_name))
        print('New url is {}'.format(wikipedia_url))
        query = session.query(WikipediaDb).filter(
            WikipediaDb.company_name_w == company_name, )
        query.update({WikipediaDb.wiki_url_w: wikipedia_url},
                     synchronize_session="fetch")
        query.update({WikipediaDb.manual_entry: "Yes"},
                     synchronize_session="fetch")
        session.commit()
        print(
            'New wikipedia url ({0}) for company {1} have successful updated'.
            format(wikipedia_url, company_name))
        print('*' * 50)
        print('Start parsing page {}'.format(wikipedia_url))
        print('*' * 50)

        companies_dict = {company_name: wikipedia_url}

        print companies_dict

        project_name = 'default'
        scrapyd_data = {'project': project_name}
        decode_company_name = u'{}'.format(company_name.decode('utf-8'))
        print decode_company_name
        company_name_lower = u'update_{}'.format(
            decode_company_name[0].lower())
        update_company_name = company_name_lower + decode_company_name[1:]
        print(update_company_name)
        scrapyd_data.update(spider=WIKIPEDIA_NAME,
                            companies=update_company_name,
                            urls=wikipedia_url)
        requests.post(SCRAPYD_SCHEDULE_URL, scrapyd_data)

        while True:
            from mx_crm.utils import get_scrapyd_jobs
            resp = get_scrapyd_jobs(project_name)
            print(resp)
            if len(resp['finished']) >= 1:
                break
            time.sleep(5)

        logger.info('Updating resources...')
        from mx_crm.synchronizers.resource_sync import ResourceSync
        RS = ResourceSync()
        RS.wiki_sync()
コード例 #20
0
    def _get_branch_level(self, branch_level_xing, branch_level_wiki, company):
        if not branch_level_xing and not branch_level_wiki:
            return 0
        # xing_branch = None
        # wiki_branch = None
        total_branch = 0
        if branch_level_xing:
            xing_branch = branch_level_xing.get(company.lower())
            if xing_branch == -20:
                xing_branch = BranchEvaluationLevel().protection_calc_xing(company)
            if company == 'Rittal GmbH & Co. KG':
                xing_branch = 20
            if company == 'Washtec Cleaning Technology GmbH':
                xing_branch = BranchEvaluationLevel().protection_calc_xing('Washtec Cleaning Technology GmbH')
        else:
            xing_branch = None
        if branch_level_wiki:
            wiki_branch = branch_level_wiki.get(company.lower())
            if wiki_branch == -20:
                wiki_branch = BranchEvaluationLevel().protection_calc_wiki(company)
            if company == 'Washtec Cleaning Technology GmbH':
                wiki_branch = BranchEvaluationLevel().protection_calc_wiki('Washtec Cleaning Technology GmbH')
        else:
            wiki_branch = None
        if xing_branch is None:
            total_branch = wiki_branch
        if wiki_branch is None:
            total_branch = xing_branch
        if not xing_branch and not wiki_branch:
            total_branch = 0
        if xing_branch and wiki_branch:
            total_branch = (xing_branch + wiki_branch) / 2

        xing_b_for_save = branch_level_xing.get(company.lower())
        wiki_b_for_save = branch_level_wiki.get(company.lower())

        if xing_b_for_save is None:
            xing_b_for_save = 0

        if wiki_b_for_save is None:
            wiki_b_for_save = 0

        query = session.query(Company).filter(Company.name == company)
        query.update({Company.mx_crm_wiki_branch: wiki_b_for_save}, synchronize_session="fetch")
        query.update({Company.mx_crm_xing_branch: xing_b_for_save}, synchronize_session="fetch")
        session.commit()

        return total_branch
コード例 #21
0
    def update_wiki_company(self, company_name, wikipedia_url):
        company_name_for_file = u'{}'.format(company_name.decode('utf-8'))
        company_name = [company_name.lower()]
        wiki_url = wikipedia_url
        f = open("mx_crm/manual_queries/wiki_url.txt", "w")
        f.write(wiki_url.encode("utf-8"))
        f.close()
        f = io.open("mx_crm/manual_queries/wiki_company_name.txt",
                    "w",
                    encoding="utf-8")
        f.write(company_name_for_file)
        f.close()

        print('*' * 50)
        print('Start updating wikipedia info for company {}'.format(
            company_name[0]))
        query = session.query(WikipediaDb).filter(
            WikipediaDb.company_name_w == company_name[0], )
        query.update({WikipediaDb.manual_entry: "manual"},
                     synchronize_session="fetch")
        session.commit()
        print('*' * 50)
        print('Start parsing given wiki url {}'.format(wiki_url))
        print('*' * 50)
        project_name = 'default'
        scrapyd_data = {'project': project_name}
        companies_dict = q.get_companies_for_wikipedia(company_name, True)
        companies = companies_dict.iterkeys()
        companies = SPLITTER.join(companies)
        urls = companies_dict.values()
        urls = SPLITTER.join(urls)
        scrapyd_data.update(spider=WIKIPEDIA_NAME,
                            companies=companies,
                            urls=urls)
        requests.post(SCRAPYD_SCHEDULE_URL, scrapyd_data)
        while True:
            resp = get_scrapyd_jobs(project_name)
            if len(resp['pending']) or len(resp['running']):
                logger.debug('{} spider still working'.format("wikipedia"))
                time.sleep(5)
            else:
                time.sleep(10)
                break
        logger.info('Updating resources...')
        from mx_crm.synchronizers.resource_sync import ResourceSync
        RS = ResourceSync()
        RS.wiki_sync()
コード例 #22
0
ファイル: queries.py プロジェクト: alexpinkevichwork/squirrel
def get_old_google_companies():
    count = 0
    date_now = datetime.now()
    last_date = date_now - timedelta(days=3 * 365)
    last_date = last_date.strftime('%Y-%m-%d')
    old_companies = session.query(Company).filter(
        Company.last_update <= last_date)
    old_names = []
    for c in old_companies:
        old_names.append(c.name)
        count += 1
    pprint(count)
    for name in old_names[:40]:
        query_w_url = session.query(Company.name, Company.website).filter(
            Company.name == name, )
        try:
            website = query_w_url[0][1]
        except IndexError:
            website = u''
        pprint(website)
        if website == u'':
            query = session.query(Company).filter(Company.name == name, )
            query.update({Company.last_update: func.now()},
                         synchronize_session="fetch")
            session.commit()
        elif website == u'NA':
            query = session.query(Company).filter(Company.name == name, )
            query.update({Company.last_update: func.now()},
                         synchronize_session="fetch")
            session.commit()
        elif website is None:
            query = session.query(Company).filter(Company.name == name, )
            query.update({Company.last_update: func.now()},
                         synchronize_session="fetch")
            session.commit()
        elif website == u'N/A':
            query = session.query(Company).filter(Company.name == name, )
            query.update({Company.last_update: func.now()},
                         synchronize_session="fetch")
            session.commit()
        else:
            query = session.query(Company).filter(Company.name == name, )
            query.update({Company.manual_entry: "old"},
                         synchronize_session="fetch")
            session.commit()
コード例 #23
0
    def parse(self, response):
        if response.status == 404:
            logger.info("ULALAL")
            query = session.query(WikipediaDb).filter(
                WikipediaDb.company_name_w == response.meta['company_name'],
            )
            if query[0] == 'old':
                query.update({WikipediaDb.manual_entry: "No"}, synchronize_session="fetch")
                from sqlalchemy import func
                query.update({WikipediaDb.company.last_update: func.now()}, synchronize_session="fetch")
                session.commit()

        wiki_url = response.url
        company_name = response.meta['company_name']
        infobox_content = self._get_infobox_content(u'{}'.format(response.body.decode("utf-8")))
        category_content = self._get_category_content(u'{}'.format(response.body.decode("utf-8")))
        summary_content = self._get_summary_content(u'{}'.format(response.body.decode("utf-8")))
        logger.info("INFOBOX SYKA!!!")
        logger.info(infobox_content)
        try:
            website = infobox_content['wiki_company_website']
            sitz = infobox_content.get('sitz')
            mitarbeiter = infobox_content.get('mitarbeiter')
            branche = infobox_content.get('branche')
            revenue = infobox_content.get('revenue')
            currency = infobox_content.get('currency')
        except:
            website = ''
            sitz = ''
            mitarbeiter = ''
            branche = ''
            revenue = ''
            currency = ''
        yield WikipediaSpiderItem(wiki_company_website=website,
                                  company_website=wiki_url,
                                  summary=summary_content,
                                  categories=category_content,
                                  sitz=sitz,
                                  mitarbeiter=mitarbeiter,
                                  branche=branche,
                                  revenue=revenue,
                                  currency=currency,
                                  company_name=company_name)
コード例 #24
0
ファイル: queries.py プロジェクト: alexpinkevichwork/squirrel
def get_companies_for_google_search(companies, force_update):
    """
    Extracts companies' info from companies table. Searches for doubles of extracted companies and deletes them.
    Prepares a list of companies to perform google search.
    :param companies: List of companies that made requests during specified range
    :param force_update: force update companies info in database from spiders
    :return: List of companies to make google search.
    """
    names = session.query(Company.name).filter(
        Company.name.in_(companies) & (Company.website != None)
        & (Company.website != 'NA'))
    names = {name[0].lower() for name in names}

    existing_names = session.query(Company.name).filter(
        Company.name.in_(companies)
        & ((Company.website == None) | (Company.website == 'NA')))
    existing_names = {name[0].lower() for name in existing_names}

    to_delete = names & existing_names
    session.query(Company).filter(
        Company.name.in_(to_delete)
        & ((Company.website == None) | (Company.website == 'NA'))).delete(
            synchronize_session='fetch')
    session.commit()

    existing_names -= to_delete

    companies = set(companies)
    if force_update:
        names.update(existing_names)
        companies.update(names)
        companies = map(lambda c: u'update_{}'.format(c), companies)
    else:
        companies = companies - names - existing_names
        companies.update(
            {u'update_{}'.format(name)
             for name in existing_names})
    return companies
コード例 #25
0
ファイル: queries.py プロジェクト: alexpinkevichwork/squirrel
def get_bad_revenue_wikipedia():
    query = session.query(WikipediaDb).filter(
        WikipediaDb.revenue_currency_wiki_w != '').filter(
            WikipediaDb.revenue_wikipedia_w == '').filter(
                WikipediaDb.manual_entry != 'confirm').filter(
                    WikipediaDb.manual_entry != 'Confirm')
    # query = session.query(WikipediaDb).filter(
    #     WikipediaDb.revenue_currency_wiki_w != '' and WikipediaDb.revenue_wikipedia_w == ''
    # )
    count = 0
    update_list = []

    for i in range(0, 170):
        print(i)
        query_u = session.query(WikipediaDb).filter(
            WikipediaDb.company_name_w == query[i].company_name_w, )
        query_u.update({WikipediaDb.manual_entry: "old"},
                       synchronize_session="fetch")
        session.commit()

    for i in query:
        count += 1
    print(count)
    print(query)
コード例 #26
0
ファイル: queries.py プロジェクト: alexpinkevichwork/squirrel
def get_drupal_sessions(start_time, end_time):
    """
    Extracts request sessions from accesslog table.
    :param start_time: time to extract requests from
    :param end_time: time to extract requests to
    :return: Dictionary with sessions info separated by companies.
    """
    logger.info("Started sessions extraction")

    timestamp_start_time = (start_time - datetime(1970, 1, 1)).total_seconds()
    timestamp_end_time = (end_time - datetime(1970, 1, 1)).total_seconds()

    readable_s = datetime.fromtimestamp(timestamp_start_time)
    readable_e = datetime.fromtimestamp(timestamp_end_time)
    access_hosts = session.query(
        Accesslog.timestamp, Accesslog.hostname, Accesslog.path, Accesslog.url,
        Accesslog.title
    ).filter(
        # between(Accesslog.timestamp, timestamp_start_time, timestamp_end_time),
        between(Accesslog.timestamp, func.unix_timestamp(start_time),
                func.unix_timestamp(end_time)),
        Accesslog.title != 'Generate image style',
        Accesslog.hostname.notin_(settings.IPS_BLACKLIST)).order_by(
            Accesslog.hostname, Accesslog.timestamp)
    accesslog = [Access(*res) for res in access_hosts]

    blacklist = {
        tup[0].lower()
        for tup in session.query(Company.name).filter(
            Company.type_main.in_(['Blacklist', 'Spam', 'Provider']))
    }

    ips_info = {
        tup[0]: tup[1:]
        for tup in session.query(DbIpDatabase.ip_ip, DbIpDatabase.ip_country,
                                 DbIpDatabase.ip_name, DbIpDatabase.ip_name_2,
                                 DbIpDatabase.ip_address)
    }

    res = {}
    drupal_session = DrupalSession()
    session_length = 0
    len_accesslog = len(accesslog[:-1]) - 1
    for index, request in enumerate(accesslog[:-1]):
        host = ip_digits(request.hostname)
        access_datetime = datetime.fromtimestamp(int(request.timestamp))

        next_request = accesslog[index + 1]
        next_request_host = ip_digits(next_request.hostname)
        next_request_access_datetime = datetime.fromtimestamp(
            int(next_request.timestamp))

        difference = next_request_access_datetime - access_datetime

        is_continue = False
        if host == next_request_host and difference.seconds < settings.MAXIMUM_DIFFERENCE_BETWEEN_SESSIONS.seconds:
            session_length += difference.seconds
            is_continue = True
        elif host == next_request_host:
            session_length += settings.LONG_SESSION_DEFAULT
            is_continue = True
        elif host != next_request_host:
            session_length += settings.LONG_SESSION_DEFAULT

        if index and host == ip_digits(
                accesslog[index - 1].hostname) and host != next_request_host:
            drupal_session.append(request)
        elif host == next_request_host:
            drupal_session.append(request)
            is_continue = True

        if is_continue and index != len_accesslog:
            continue

        if host in ips_info:
            country, company_name, address_result, full_address_result = ips_info[
                host]
        else:
            country = company_name = address_result = full_address_result = ''
            try:
                country, company_name, address_result, full_address_result = get_whois(
                    host)
            except Exception as e:
                logger.error(
                    'get_whois function (RIPE) got an error for host: {}\nError: {}'
                    .format(host, str(e)))
                continue
            finally:
                address_result = address_result[:250]
                logger.debug(address_result)
                full_address_result = full_address_result[:350]

                new_entry = DbIpDatabase(ip_ip=host,
                                         ip_country=country,
                                         ip_name=company_name,
                                         ip_name_2=address_result,
                                         ip_address=full_address_result,
                                         ip_host=host,
                                         ip_timestamp=func.now())
                session.add(new_entry)

                ips_info[host] = (country, company_name, address_result,
                                  full_address_result)

        company_name = company_name.lower()

        if company_name and country in settings.RELEVANT_COUNTRIES \
                and company_name not in settings.PROVIDERS_BLACKLIST \
                and company_name not in blacklist \
                and not any(word in company_name for word in settings.COMPANIES_BLACKLIST) \
                and not any(re.search(regexp, company_name) for regexp in settings.PROVIDERS_BLACKLIST_REGEXPS) \
                and not any(re.search(regexp, company_name) for regexp in settings.COMPANIES_BLACKLIST_REGEXPS):

            if company_name not in res:
                res[company_name] = CompanyEntry(*ips_info[host], sessions=[])

            res[company_name].sessions.append(drupal_session)
            res[company_name].session_length = timedelta(
                seconds=session_length)

        drupal_session = DrupalSession()
        session_length = 0

        session.commit()
    logger.info('Sessions extraction has been finished successfully.')
    return res
コード例 #27
0
    def update(self, days, **kwargs):
        import datetime
        force_update = True
        date_now = datetime.datetime.now()
        start_date, end_date = prepare_date_to_drupal_execute(days, **kwargs)
        drupal_companies = q.get_drupal_sessions(end_date, start_date)
        companies_names = drupal_companies.keys()
        companies_names = map(lambda c: c.lower(), companies_names)
        logger.debug('Found companies: {}'.format(companies_names))
        logger.debug('Count of founded companies: {}'.format(
            len(companies_names)))
        companies_wiki = {}
        companies_xing = {}
        companies_google = {}
        companies_next_list = []
        finish_companies_list = []
        #imported_companies = OneYearUpdate().import_companies_update()

        for company in drupal_companies.keys():
            companies_next_list.append(company)
        finish_companies_list = companies_next_list
        #finish_companies_list = companies_next_list + imported_companies
        for company in finish_companies_list:
            try:
                query_w = session.query(WikipediaDb.last_update_w).filter(
                    WikipediaDb.company_name_w == company)
                try:
                    if query_w[0][0]:
                        date_diff_w = date_now - query_w[0][0]
                        if date_diff_w.days > 365:
                            companies_wiki[company] = drupal_companies[company]
                except IndexError:
                    continue
            except KeyError:
                continue

        for company in finish_companies_list:
            try:
                query_x = session.query(XingCompanyDb.last_update_x).filter(
                    XingCompanyDb.company_name_x == company)
                try:
                    if query_x[0][0]:
                        date_diff_x = date_now - query_x[0][0]
                        if date_diff_x.days > 365:
                            companies_xing[company] = drupal_companies[company]
                except IndexError:
                    continue
            except KeyError:
                continue

        for company in finish_companies_list:
            try:
                query_g = session.query(
                    Company.last_update).filter(Company.name == company)
                try:
                    if query_g[0][0]:
                        date_diff_g = date_now - query_g[0][0]
                        if date_diff_g.days > 365:
                            companies_google[company] = drupal_companies[
                                company]
                except IndexError:
                    continue
            except KeyError:
                continue

        companies_names_wiki = companies_wiki.keys()
        companies_names_wiki = map(lambda c: c.lower(), companies_names_wiki)
        companies_names_xing = companies_xing.keys()
        companies_names_xing = map(lambda c: c.lower(), companies_names_xing)
        companies_names_google = companies_google.keys()
        companies_names_google = map(lambda c: c.lower(),
                                     companies_names_google)

        logger.debug('Companies to update for wikipedia: {}'.format(
            companies_names_wiki))
        logger.debug('Count of companies to update for wikipedia: {}'.format(
            len(companies_names_wiki)))
        logger.debug(
            'Companies to update for xing: {}'.format(companies_names_xing))
        logger.debug('Count of companies to update for xing: {}'.format(
            len(companies_names_xing)))
        logger.debug('Companies to update google evaluation: {}'.format(
            companies_names_google))
        logger.debug(
            'Count of companies to update google evaluation: {}'.format(
                len(companies_names_google)))

        for name in companies_names_wiki:
            pprint(name)
            query_w_url = session.query(
                WikipediaDb.company_name_w, WikipediaDb.wiki_url_w).filter(
                    WikipediaDb.company_name_w == name, )
            try:
                wiki_url = query_w_url[0][1]
            except IndexError:
                xing_url = u''
            pprint(wiki_url)
            if wiki_url != u'':
                query = session.query(WikipediaDb).filter(
                    WikipediaDb.company_name_w == name, )
                query.update({WikipediaDb.manual_entry: "old"},
                             synchronize_session="fetch")
                session.commit()
            elif wiki_url == u'NA':
                query_w_u = session.query(
                    Company.wikipedia_url).filter(Company.name == name)
                wiki_page = query_w_u[0][0]
                query = session.query(WikipediaDb).filter(
                    WikipediaDb.company_name_w == name, )
                query.update({WikipediaDb.manual_entry: "old"},
                             synchronize_session="fetch")
                query.update({WikipediaDb.wiki_url_w: wiki_page},
                             synchronize_session="fetch")
                session.commit()
            elif wiki_url == u'N/A':
                query_w_u = session.query(
                    Company.wikipedia_url).filter(Company.name == name)
                wiki_page = query_w_u[0][0]
                query = session.query(WikipediaDb).filter(
                    WikipediaDb.company_name_w == name, )
                query.update({WikipediaDb.manual_entry: "old"},
                             synchronize_session="fetch")
                query.update({WikipediaDb.wiki_url_w: wiki_page},
                             synchronize_session="fetch")
                session.commit()
            else:
                query_w_u = session.query(
                    Company.wikipedia_url).filter(Company.name == name)
                wiki_page = query_w_u[0][0]
                query = session.query(WikipediaDb).filter(
                    WikipediaDb.company_name_w == name, )
                query.update({WikipediaDb.manual_entry: "old"},
                             synchronize_session="fetch")
                query.update({WikipediaDb.wiki_url_w: wiki_page},
                             synchronize_session="fetch")
                session.commit()

        for name in companies_names_xing:
            pprint(name)
            query_x_url = session.query(
                XingCompanyDb.company_name_x, XingCompanyDb.xing_url).filter(
                    XingCompanyDb.company_name_x == name, )
            try:
                xing_url = query_x_url[0][1]
            except IndexError:
                xing_url = u''
            pprint(xing_url)
            if xing_url != u'':
                query = session.query(XingCompanyDb).filter(
                    XingCompanyDb.company_name_x == name, )
                query.update({XingCompanyDb.manual_entry: "old"},
                             synchronize_session="fetch")
                session.commit()
            elif xing_url != u'NA':
                query_x_p = session.query(
                    Company.xing_page).filter(Company.name == name)
                xing_page = query_x_p[0][0]
                query = session.query(XingCompanyDb).filter(
                    XingCompanyDb.company_name_x == name, )
                query.update({XingCompanyDb.manual_entry: "old"},
                             synchronize_session="fetch")
                query.update({XingCompanyDb.xing_url: xing_page},
                             synchronize_session="fetch")
                session.commit()
            elif xing_url != u'N/A':
                query_x_p = session.query(
                    Company.xing_page).filter(Company.name == name)
                xing_page = query_x_p[0][0]
                query = session.query(XingCompanyDb).filter(
                    XingCompanyDb.company_name_x == name, )
                query.update({XingCompanyDb.manual_entry: "old"},
                             synchronize_session="fetch")
                query.update({XingCompanyDb.xing_url: xing_page},
                             synchronize_session="fetch")
                session.commit()
            else:
                query_x_p = session.query(
                    Company.xing_page).filter(Company.name == name)
                xing_page = query_x_p[0][0]
                query = session.query(XingCompanyDb).filter(
                    XingCompanyDb.company_name_x == name, )
                query.update({XingCompanyDb.manual_entry: "old"},
                             synchronize_session="fetch")
                query.update({XingCompanyDb.xing_url: xing_page},
                             synchronize_session="fetch")
                session.commit()

        for name in companies_names_google:
            pprint(name)
            query_g_url = session.query(Company).filter(Company.name == name, )
            query_g_url.update({Company.manual_entry: "old"},
                               synchronize_session="fetch")
            session.commit()
コード例 #28
0
 def close_spider(self, spider):
     session.commit()
コード例 #29
0
def create_report(companies, account_data=[], account_headers=[], total_fields=[], data_links={}, google_analytics_companies={},
                  dates={}):
    """
    Creates and saves locally report.
    :param companies: List of companies that made requests during specified range
    """
    logger.debug(companies)

    file_name = settings.REPORTS_FILE.format(now=datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S"))
    path_to_xl = settings.rel('mx_crm', settings.REPORTS_FOLDER, file_name)
    logger.debug('Export excel file: {}'.format(path_to_xl))

    wb = Workbook()
    ws = wb.create_sheet('Report')

    logger.info('Saving report to the local excel file')
    wb_headers = settings.NEW_WORKBOOK_HEADERS
    # wb_headers = settings.WORKBOOK_HEADERS
    if account_headers:
        wb_headers += account_headers
    if total_fields:
        wb_headers += settings.TOTAL_HEADERS
    wb_headers += settings.RATING_HEADERS
    ws.append(wb_headers)

    companies_info = get_companies_info(companies)
    logger.info('companies_info')
    logger.info(companies_info)
    companies_info_manual_id = get_company_table_info(companies)
    logger.info('companies_info_manual_id')
    logger.info(companies_info_manual_id)
    # manual
    companies_info_websites = get_companies_info_websites(companies)
    logger.debug('Companies: {}'.format(len(companies_info)))

    companies_wiki_info = get_wiki_info(companies)
    logger.debug('Wiki companies: {}'.format(len(companies_wiki_info)))

    companies_xing_info = get_xing_info(companies)
    logger.debug('Xing companies: {}'.format(len(companies_xing_info)))

    companies_names = set()
    websites_for_rating = set()
    for c in companies_info.values():
        if c.website:
            websites_for_rating.add(c.website)
        if c.name:
            companies_names.add(c.name)
    rating_data = SquirrelRating().calc(companies=companies_names, websites=websites_for_rating)
    company_manual_account = get_manual_account(companies_names)
    variables_data = SquirrelRating().get_rating_variables(companies, websites_for_rating)
    #logger.info("rating data {}".format(rating_data))
    #logger.info("rating data {}".format(type(rating_data)))

    try:
        counter = 0
        for company_name, company in sorted(companies.items(), key=lambda x: x[1].session_length, reverse=True):
            ws.row_dimensions[counter].collapsed = True
            address = company.full_address
            country = company.country
            # rating = rating_data.get(company.company_name).get('score')
            wiki_info = companies_wiki_info.get(company_name)
            xing_info = companies_xing_info.get(company_name)
            company_info = companies_info.get(company_name)
            company_table_manual_id = companies_info_manual_id.get(company_name)
            website = company_info.website if company_info else ''
            full_website = re.sub('www\d?\.', '', website).rstrip('/').lower()

            prepared_company_name = company_name
            xing_page = company_info.xing_page if company_info else None
            session_length = company.session_length

            for session in company.sessions:
                for request in session.requests:
                    #master_company = alchemy_session.query(Company.name).filter(Company.name == company.company_name)
                    access_history = MxCrmAccessHistory(
                        company_name=company.company_name,
                        a_h_sid=counter,
                        mx_crm_visited_page=request.title,
                        mx_crm_referrer=request.url[:255],
                        mx_crm_session_date=datetime.datetime.fromtimestamp(int(request.timestamp)).strftime(
                            '%Y-%m-%d'),
                        mx_crm_session_time=datetime.datetime.fromtimestamp(int(request.timestamp)).strftime(
                            '%H:%M:%S'),
                        mx_crm_ip_vlan=request.hostname
                    )
                    alchemy_session.add(access_history)
                    alchemy_session.commit()
                    sheet_counter = 2
                    company_table_info = get_manual_website(company.company_name)
                    access_dt = datetime.datetime.fromtimestamp(request.timestamp).strftime('%Y-%m-%d %H:%M:%S')
                    rcd_name_rating = companies_info.get(company_name)
                    if rcd_name_rating and rcd_name_rating.name:
                        rating = rating_data.get(rcd_name_rating.name, 'N/C') if rating_data.get(
                            rcd_name_rating.name) is not None else 'N/C'

                    if company_name in total_fields:
                        obj = total_fields.get(company_name, {})
                        total_session_lenght = datetime.timedelta(seconds=obj.get('time') or 0)
                    # row = [company.company_name]
                    sheet_number = 'A{}'.format(sheet_counter)
                    # ws[sheet_number].hyperlink = "http://google.com"
                    # ws[sheet_number].value = company.company_name
                    # ws.cell(row=1, column=sheet_counter).value = '=HYPERLINK("{}", "{}")'.format('google.com', company.company_name)
                    link = ''
                    # pprint(company.company_name)
                    link = data_links.get(company.company_name.lower())
                    c_id = alchemy_session.query(Company.id).filter(Company.name == company.company_name)

                    try:
                        company_id = c_id[0][0]
                        webinterface_link = "http://192.168.0.141:8000/squirrel/accounts/{}/".format(company_id)
                    except IndexError:
                        company_id = ''
                    webinterface_link = "http://192.168.0.141:8000/squirrel/accounts/search/{}/".format(company.company_name)
                    # pprint(link)

                    query_link = alchemy_session.query(Company).filter(Company.name == company.company_name)
                    query_link.update({Company.d_crm_link: link}, synchronize_session="fetch")
                    alchemy_session.commit()
                    row = ['=HYPERLINK("{}", "{}")'.format(webinterface_link, company.company_name),
                           company_table_info.get(company.company_name), website, session_length, total_session_lenght,
                           rating_data.get(company.company_name), address, request.title,
                           request.url, access_dt, country]
                    sheet_counter += 1
                    # pprint(type(row))
                    if wiki_info:
                        row.extend([
                            wiki_info.manual_entry,
                            wiki_info.wiki_url_w,
                            convert_to_float(wiki_info.revenue_wikipedia_w),
                            wiki_info.revenue_currency_wiki_w,
                            convert_to_int(wiki_info.employees_wikipedia_w),
                            wiki_info.categories_wikipedia_w,
                            wiki_info.branch_wikipedia_w,
                            wiki_info.summary_wikipedia_w,
                        ])
                    else:
                        row.extend([''] * 8)

                    if xing_info:
                        if company_table_manual_id.manual_account_id:
                            c_t_manual_id = company_table_manual_id.manual_account_id
                        elif company_table_manual_id.manual_account_id == u'':
                            c_t_manual_id = u'NONE'
                        elif company_table_manual_id.manual_account_id == '':
                            c_t_manual_id = u'NONE'
                        else:
                            c_t_manual_id = u'NONE'
                        row.extend([
                            xing_info.manual_entry,
                            xing_page,
                            xing_info.country_xing,
                            xing_info.employees_group_xing_x,
                            xing_info.employees_size_xing,
                            xing_info.description_xing,
                            xing_info.industry_xing,
                            c_t_manual_id
                            # company_manual_account.get(company_name)
                        ])
                    else:
                        row.extend([''] * 8)

                    if full_website in account_data or prepared_company_name in account_data:
                        data_to_extend = []
                        for key in account_headers:
                            if full_website in account_data:
                                value = account_data[full_website].get(key, '')
                            else:
                                value = account_data[prepared_company_name].get(key, '')
                            data_to_extend.append(value)
                        row.extend(data_to_extend)
                    elif account_headers:
                        row.extend([''] * len(account_headers))

                    if company_name in total_fields:
                        obj = total_fields.get(company_name, {})
                        row.extend([
                            datetime.timedelta(seconds=obj.get('time') or 0),
                            convert_to_int(obj.get('visited')),
                            obj.get('last_visited'),
                        ])
                    else:
                        row.extend([''] * len(settings.TOTAL_HEADERS))

                    rcd_name = companies_info.get(company_name)
                    if rcd_name and rcd_name.name:
                        if wiki_info:
                            row.extend([
                                wiki_info.manual_entry
                            ])
                        else:
                            row.extend([""])

                        if xing_info:
                            row.extend([
                                xing_info.manual_entry
                            ])
                        else:
                            row.extend([""])
                        query = alchemy_session.query(Company).filter(Company.name == rcd_name.name)
                        dict_for_save = dict(mx_crm_location_level=variables_data.get(rcd_name.name).get('location'),
                                             mx_crm_branch_level=variables_data.get(rcd_name.name).get('branch'),
                                             mx_crm_google_evaluation=variables_data.get(rcd_name.name).get(
                                                 'google_ev'),
                                             mx_crm_wiki_rating_points=variables_data.get(rcd_name.name).get(
                                                 'wiki_size'),
                                             mx_crm_xing_rating_points=variables_data.get(rcd_name.name).get(
                                                 'xing_size'),
                                             mx_crm_revenue_level=variables_data.get(rcd_name.name).get(
                                                 'revenue_point'))
                        rating_update_info = dict(
                            mx_crm_location_level=variables_data.get(rcd_name.name).get('location'),
                            mx_crm_branch_level=variables_data.get(rcd_name.name).get('branch'),
                            mx_crm_google_evaluation=float(variables_data.get(rcd_name.name).get('google_ev')),
                            mx_crm_wiki_rating_points=variables_data.get(rcd_name.name).get('wiki_size'),
                            mx_crm_xing_rating_points=variables_data.get(rcd_name.name).get('xing_size'),
                            mx_crm_revenue_level=variables_data.get(rcd_name.name).get('revenue_point'))
                        query.update(rating_update_info, synchronize_session=False)
                        relation_ship_type = row[36]
                        account_name = row[27]
                        account_owner = row[28]
                        abc_rating = row[38]

                        closed_activity_type = row[31]
                        if row[32] != '':
                            closed_date = row[32]
                        else:
                            closed_date = None
                        # closed_date = datetime.datetime.strptime(str(row[32]), '%m/%d/%Y %H:%M:%S')
                        open_activity_type = row[33]
                        if row[34] != '':
                            schedule_date = row[34]
                        else:
                            schedule_date = None
                        # schedule_date = datetime.datetime.strptime(str(row[34]), '%m/%d/%Y %H:%M:%S')
                        total_session_length = row[39]
                        total_visited_page = row[40]
                        last_visit_time = row[41]

                        alchemy_session.commit()
                        dynamics_crm_info = dict(d_crm_relationship_type=relation_ship_type,
                                                 d_crm_account_name=account_name,
                                                 d_crm_account_owner=account_owner,
                                                 d_crm_abc_rating=abc_rating,
                                                 d_crm_closed_activity_type=closed_activity_type,
                                                 d_crm_open_activity_type=open_activity_type,
                                                 d_crm_closed_date=closed_date,
                                                 d_crm_schedule_date=schedule_date,
                                                 mx_crm_total_session_length=total_session_length,
                                                 mx_crm_total_visited_pages=total_visited_page,
                                                 mx_crm_last_visit=last_visit_time,
                                                 squirrel_rating=rating_data.get(rcd_name.name))
                                                 #webinterface_link=webinterface_link) # also in this query save webinterface link

                        query_dynamics_crm = alchemy_session.query(Company).filter(Company.name == rcd_name.name)
                        query_dynamics_crm.update(dynamics_crm_info, synchronize_session=False)
                        alchemy_session.commit()
                        row.extend([
                            rating_data.get(rcd_name.name, 'N/C') if rating_data.get(
                                rcd_name.name) is not None else 'N/C',
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('location')
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('branch')
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('google_ev')
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('wiki_size')
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('xing_size')
                        ])
                        row.extend([
                            variables_data.get(rcd_name.name).get('revenue_point')
                        ])
                    else:
                        row.extend(['N/C'] * len(settings.RATING_HEADERS))

                    try:
                        ws.append(row)
                    except ValueError as e:
                        logger.info(e)
                    counter += 1
                    if not ws.row_dimensions[counter - 1].collapsed:
                        ws.row_dimensions[counter].hidden = True
                        ws.row_dimensions[counter].outlineLevel = 1

        wb.save(path_to_xl)
        d_start = dates.get('start_date')
        e_date = dates.get('end_date')
        start_date = datetime.datetime(d_start.year, d_start.month, d_start.day)
        end_date = datetime.datetime(e_date.year, e_date.month, e_date.day)
        # g_a_c = get_google_analytics_sessions(start_date, end_date, True)
        # logger.info(g_a_c)
        # logger.info(google_analytics_companies)
        # result = add_google_analytics_accounts_to_report_file(path_to_xl, start_date, end_date)
        # os.chdir("C:/Users/admin/PycharmProjects/SquirrelRunnerNew/mx_crm")
        # cd = os.system('python add_companies.py --days_start={0} --year_start={1} --month_start={2} --days_end={3} --year_end={4} --month_end={5}'.format(
        #     d_start.day, d_start.year, d_start.month, e_date.day, e_date.year, e_date.month
        # ))
        # logger.info(cd)

    except KeyError as e:
        logger.error(e)
    logger.info('Local file has been updated')
コード例 #30
0
 def log_end(self, status, error):
     self.current_session.error = error
     self.current_session.status = status
     self.current_session.end_datetime = datetime.now()
     session.commit()