Example #1
0
File: views.py Project: EPX/epx2013
def get_data(src, act_ids, url):
    """
    FUNCTION
    get data of an act from a source in parameter
    PARAMETERS
    src: source (eurlex or oeil) [string]
    act_ids: dictionary of act ids for each source [dictionary of ActIds instances]
    url: link to the act page [string]
    RETURN
    fields:  dictionary which contains retrieved data for a given source [dictionary]
    dg_names: list of dg names [list of strings]
    resp_names: list of resp names [list of strings]
    """
    logger.debug("get_data")
    fields = {}
    dg_names = [None] * nb_dgs
    resp_names = [None] * nb_resps
    ok = False

    logger.debug("get_url_content_" + src)

    if src == "eurlex":
        url_content = [get_url_content_eurlex(url[0]), get_url_content_eurlex(url[1])]
        if url_content[0] is not False:
            ok = True
    elif src == "oeil":
        # oeil
        url_content = get_url_content_oeil(url)
        if url_content is not False:
            ok = True

    # if the url exists and there is a valid content
    if ok:
        setattr(act_ids[src], "url_exists", True)
        fields, dg_names, resp_names = eval("get_data_" + src)(url_content, act_ids["index"])
    else:
        setattr(act_ids[src], "url_exists", False)
        logger.debug("error while retrieving " + src + " url")
        print "error while retrieving " + src + " url"

    # update url exist attribute
    logger.debug("act_ids to be saved")
    act_ids[src].save()

    return fields, dg_names, resp_names
Example #2
0
 def handle(self, **options):
     
     #get type_act for acts of 2014 NOT YET VALIDATED AND VALIDATE THEM for the statistical analysis
     for act in Act.objects.filter(type_acte__isnull=True, validated=2, releve_annee=2014):
         print act
         #url content
         no_celex=ActIds.objects.get(src="index", act=act).no_celex
         url=get_url_eurlex(no_celex)
         soup=get_url_content_eurlex(url)
         #type acte
         act.type_acte=get_type_acte(soup)
         act.save()
Example #3
0
    def handle(self, **options):
        
        for act_ids in ActIds.objects.filter(src="index", act__validated=2, act__releve_annee=2013):
            act=act_ids.act
            print act
            print "act.adopt_propos_origine", act.adopt_propos_origine
            
            url=get_url_eurlex(act_ids.no_celex, tab="HIS")
            soup_his=get_url_content_eurlex(url)
            soup_his=soup_his.find("div", {"class": "tabContent"})
            #remove script tags
            [s.extract() for s in soup_his('script')]
            
            adopt_propos_origine=get_adopt_propos_origine(soup_his, act_ids.propos_origine)
            print "adopt_propos_origine", adopt_propos_origine

            if str(adopt_propos_origine)!=str(act.adopt_propos_origine):
                print "DIFFERENT"
                break
Example #4
0
    def handle(self, **options):
        
        for act_ids in ActIds.objects.filter(src="index", act__validated=2, act__date_cons_a__isnull=True, act__date_cons_b__isnull=True, act__releve_annee__in=[1996,2013,2014]):
            act=act_ids.act
            print act
            
            url=get_url_eurlex(act_ids.no_celex, tab="HIS")
            soup_his=get_url_content_eurlex(url)
            soup_his=soup_his.find("div", {"class": "tabContent"})
            #remove script tags
            [s.extract() for s in soup_his('script')]
            
            point_b_tables=get_point_b_tables(soup_his, act_ids.propos_origine)
            act.date_cons_b=get_date_cons_b(point_b_tables)
            print "date_cons_b", act.date_cons_b
            point_a_tables=get_point_a_tables(soup_his, act_ids.propos_origine)
            act.date_cons_a=get_date_cons_a(point_a_tables)
            print "date_cons_a", act.date_cons_a

            act.save()