""" Module run every day by a cron job to update database and cache. """ ## setup the Django with its private settings for server. if __name__ == "__main__": from main.tools import set_up set_up.set_up_django_environment('main.settings_for_schedule') from contracts.crawler import DynamicDataCrawler, StaticDataCrawler from contracts import models from contracts.analysis import analysis_manager def update(): # check if we need to download static data if not models.ProcedureType.objects.exists(): c = StaticDataCrawler() c.retrieve_and_save_all() # check if we need to create categories if not models.Category.objects.exists(): from contracts.tools.cpvs import build_categories build_categories() # retrieve latest dynamic data. crawler = DynamicDataCrawler() affected_entities = crawler.update_all() # update entities cached data for entity in models.Entity.objects.all():
""" This module has methods that cross validate the data we have against the data the official database has. """ import requests import json if __name__ == '__main__': # these first two lines are used to setup a minimal Django environment from main.tools import set_up set_up.set_up_django_environment('main.tools.settings_for_script') from contracts.models import Entity, Contract def get_entity_contracts_difference(base_id): """ Returns a set with the difference between the base_id's of contracts from BASE with those we have. """ def _are_entity_contracts_synchronized(entity): """ Checks if the number of contracts in BASE of an entity matches our number """ def get_entity_contracts_count(): """ Retrieves the number of contracts from a given entity from BASE """ url = 'http://www.base.gov.pt/base2/rest/contratos?adjudicatariaid=%d' \ '&sort(-id)' % entity.base_id
# these first two lines are used to setup a minimal Django environment from main.tools import set_up set_up.set_up_django_environment('main.settings_for_script') # From this point on, we are ready to use Django for accessing the remote database. from contracts import models from django.db.models import Sum # this query asks for all contracts in the database all_contracts = models.Contract.objects.all() # this query counts the previous query number_of_contracts = all_contracts.count() print(number_of_contracts) # this query sums the prices of all contracts. total_price = all_contracts.aggregate(sum=Sum('price'))['sum'] print(total_price)
from main.tools import set_up set_up.set_up_django_environment('main.settings') import logging from django.db import transaction from law.crawler_forms import DocumentForm from law.models import Document logger = logging.getLogger(__name__) def build_data(document, publication): """ Maps the variables of pt_law_downloader to our form """ return {'creator_name': publication['creator'], 'type': publication['type'], 'number': publication['number'], 'text': publication['text'], 'summary': publication['summary'], 'date': publication['date'], 'dre_doc_id': publication['dre_id'], 'dre_pdf_id': publication['pdf_id'], 'dr_pages': publication['pages'], 'dr_series': document['series'], 'dr_supplement': document['supplement'], 'dr_number': document['number'] }
# these first two lines are used to setup a minimal Django environment from main.tools import set_up set_up.set_up_django_environment('contracts.tools.public_settings') # From this point on, we are ready to use Django for accessing the remote database. from contracts import models from django.db.models import Sum # this query asks for all contracts in the database all_contracts = models.Contract.objects.all() # this query counts the previous query number_of_contracts = all_contracts.count() print(number_of_contracts) # this query sums the prices of all contracts. total_price = all_contracts.aggregate(sum=Sum('price'))['sum'] print(total_price)
from main.tools import set_up set_up.set_up_django_environment('main.settings') import logging from django.db import transaction from law.crawler_forms import DocumentForm from law.models import Document logger = logging.getLogger(__name__) def build_data(document, publication): """ Maps the variables of pt_law_downloader to our form """ return { 'creator_name': publication['creator'], 'type': publication['type'], 'number': publication['number'], 'text': publication['text'], 'summary': publication['summary'], 'date': publication['date'], 'dre_doc_id': publication['dre_id'], 'dre_pdf_id': publication['pdf_id'], 'dr_pages': publication['pages'], 'dr_series': document['series'], 'dr_supplement': document['supplement'], 'dr_number': document['number'] }