def main():

    try:
        base = DiasporaOutput(sys.argv[1])
    except:
        logging.error("Missing output path")
        exit(-1)
    try:
        busqueda_id = int(sys.argv[2])
    except:
        logging.error("Missing busqueda_id")
        exit(-1)
    busqueda = Busqueda.objects.get(pk=busqueda_id)
    for persona in busqueda.persona_set.all():
        person_file = base.write_personal_feature_matrix(persona)
        logging.info("Exporting features matrix for person " + persona.name)
        # TODO: reconnaitre 32 bits et 64 bits data model
        # j48_path = UNOPORUNO_ROOT + '/resources/classifiers/j48/J48.weka.32.data.model'
        nbtree_path = UNOPORUNO_ROOT + "/resources/classifiers/naivebayes/NaiveBayes.data.model"
        command = (
            "java weka.classifiers.bayes.NaiveBayes -l "
            + nbtree_path
            + " -T "
            + person_file
            + " -p 1 > "
            + person_file
            + ".out"
        )
        logging.info("classyfying with command=" + command)
        try:
            result = os.system(command)
        except:
            exit(-1)

    for subdirs, dirs, files in os.walk(sys.argv[1] + "/"):
        for file in files:
            re_out = re.search("\.out$", file)
            if not re_out:
                continue
            top5 = get_weka_top5(sys.argv[1] + "/" + file)
            logging.info("Extracting " + str(top5) + " tuples from file:" + file)
            for s in top5:
                snippet = Snippet.objects.get(pk=int(s[0]))
                snippet.converging_pipelines = 1
                snippet.save()
Ejemplo n.º 2
0
def main():

    try:
        base = DiasporaOutput(sys.argv[1])
    except:
        logging.error('Missing output path')
        exit(-1)
    try:
        busqueda_id = int(sys.argv[2])
    except:
        logging.error('Missing busqueda_id')
        exit(-1)
    busqueda = Busqueda.objects.get(pk=busqueda_id)
    for persona in busqueda.persona_set.all():
        person_file = base.write_personal_feature_matrix(persona)
        logging.info('Exporting features matrix for person ' + persona.name)
        # TODO: reconnaitre 32 bits et 64 bits data model
        # j48_path = UNOPORUNO_ROOT + '/resources/classifiers/j48/J48.weka.32.data.model'
        smo_path = UNOPORUNO_ROOT + '/resources/classifiers/smo/SMO.data.model'
        command = 'java weka.classifiers.functions.SMO -l ' + smo_path + ' -T ' + person_file + ' -p 1 > ' + person_file + '.out'
        logging.info('classyfying with command=' + command)
        try:
            result = os.system(command)
        except:
            exit(-1)

    for subdirs, dirs, files in os.walk(sys.argv[1] + '/'):
        for file in files:
            re_out = re.search('\.out$', file)
            if not re_out:
                continue
            top5 = get_weka_top5(sys.argv[1] + '/' + file)
            logging.info('Extracting ' + str(top5) + ' tuples from file:' +
                         file)
            for s in top5:
                snippet = Snippet.objects.get(pk=int(s[0]))
                snippet.converging_pipelines = 1
                snippet.save()
Ejemplo n.º 3
0
def main():

    try:
        base = DiasporaOutput(sys.argv[1])
    except:
        logging.error('Missing output path')
        exit(-1)
    try:
        busqueda_id = int(sys.argv[2])
    except:
        logging.error ('Missing busqueda_id')
        exit(-1)
    busqueda = Busqueda.objects.get(pk=busqueda_id)
    for persona in busqueda.persona_set.all():
        person_file = base.write_personal_feature_matrix(persona)
        logging.info('Exporting features matrix for person ' +persona.name)
        # TODO: reconnaitre 32 bits et 64 bits data model
        # j48_path = UNOPORUNO_ROOT + '/resources/classifiers/j48/J48.weka.32.data.model'
        smo_path = UNOPORUNO_ROOT + '/resources/classifiers/smo/SMO.data.model'
        command = 'java weka.classifiers.functions.SMO -l '+smo_path+' -T '+person_file+' -p 1 > '+person_file+'.out'
        logging.info('classyfying with command=' + command)
        try:
            result = os.system(command)
        except:
            exit(-1)

    for subdirs, dirs, files in os.walk(sys.argv[1]+'/'):
        for file in files:
            re_out = re.search('\.out$', file)
            if not re_out:
                continue
            top5 = get_weka_top5(sys.argv[1]+'/'+file)
            logging.info('Extracting ' +str(top5)+ ' tuples from file:' +file )
            for s in top5:
                snippet = Snippet.objects.get(pk=int(s[0]))
                snippet.converging_pipelines=1
                snippet.save()
Ejemplo n.º 4
0
    except:
        logging.error('No busqueda object with id=' +busqueda_in+ ' in UNOPORUNO database.')
        exit(-1)
        
    logging.info('Processing busqueda ' +busqueda.nombre )
try:
    results_path = sys.argv[2]
except:
    logging.error('Missing parameter path')
    logging.error('Usage: python unoporuno_export.py NAME|NUMBER path')
    exit(-1)

try:
    person_id_list = []
    person_id_file = open(sys.argv[3])
    for l in person_id_file: 
            person_id_list.append(l.strip())
except:
    person_id_list = None
    

diaspora_output = DiasporaOutput(results_path)
if person_id_list:
    logging.debug('launching export_persona with id_list=' +str(person_id_list))
    diaspora_output.export_unoporuno_persona(busqueda, person_id_list)
else:
    diaspora_output.export_unoporuno_busqueda(busqueda)
    


Ejemplo n.º 5
0
    try:
        busqueda = Busqueda.objects.get(nombre=busqueda_in)
    except:
        logging.error('No busqueda object with id=' + busqueda_in +
                      ' in UNOPORUNO database.')
        exit(-1)

    logging.info('Processing busqueda ' + busqueda.nombre)
try:
    results_path = sys.argv[2]
except:
    logging.error('Missing parameter path')
    logging.error('Usage: python unoporuno_export.py NAME|NUMBER path')
    exit(-1)

try:
    person_id_list = []
    person_id_file = open(sys.argv[3])
    for l in person_id_file:
        person_id_list.append(l.strip())
except:
    person_id_list = None

diaspora_output = DiasporaOutput(results_path)
if person_id_list:
    logging.debug('launching export_persona with id_list=' +
                  str(person_id_list))
    diaspora_output.export_unoporuno_persona(busqueda, person_id_list)
else:
    diaspora_output.export_unoporuno_busqueda(busqueda)
Ejemplo n.º 6
0
def classify_person_top5(busqueda_id, path, classifier, data_model_file):    
    #TODO: validar cuando a) no hay snippets clasificados como positivos y b) hay menos de 5 snippets clasificados como positivos

    output_path = path+'/'+classifier+'/'
    logging.info('classifying persons with busqueda_id='+str(busqueda_id)+', classifier'\
                  +classifier+ ' , data_model_file=' +data_model_file)
    try:
        base = DiasporaOutput(output_path)
    except:
        logging.error('Error on output path'+output_path)
        exit(-1)
    try:
        busqueda_id = int(busqueda_id)
    except:
        logging.error ('Missing busqueda_id')
        exit(-1)
    busqueda = Busqueda.objects.get(pk=busqueda_id)
    d_personas = dict()
    for persona in busqueda.persona_set.all():
        persona_file = persona_file = base.write_personal_feature_matrix_2class(persona)
        command = 'java ' +classifier+ ' -l '+data_model_file+' -T '+persona_file+' -p 1 > '+persona_file+'.out'
        try:
            result = os.system(command)
        except:
            exit(-1)
        logging.info('classyfying with command=' + command)
    
    for subdirs, dirs, files in os.walk(output_path):
        for file in files:
            re_out = re.search('\.out$', file)
            if not re_out:
                continue
            classed_snippets = get_weka_top5(output_path+'/'+file)
            logging.info('Extracting ' +str(len(classed_snippets))+ ' tuples from file:' +file ) 
            logging.info('Classed snippets=' +str(classed_snippets))
            if len(classed_snippets):
                for tupla in classed_snippets:
                    logging.info('looking for snippet id =' +str(tupla[0]))
                    snippet = Snippet.objects.get(id=int(tupla[0]))
                    if d_personas.has_key(snippet.persona_id):
                        d_paises = d_personas[snippet.persona_id]
                    else:
                        d_paises = dict()
                    lista_paises = snippet.featured_countries.split(',') if snippet.featured_countries else []
                    for pais in lista_paises:
                        u_pais = pais.encode('utf-8')
                        if d_paises.has_key(u_pais):
                            d_paises[u_pais] += 1
                        else:
                            d_pais = dict({u_pais:1})
                            d_paises.update(d_pais)
                    d_persona = dict({snippet.persona_id:d_paises})
                    d_personas.update(d_persona)

                    snippet.converging_pipelines=2
                    snippet.RE_score = get_feature_count(snippet.RE_features)
                    snippet.save()
                    


    LA = ['AR','BZ','BO','CL','CO','CR','C','DO','SV','MX','GT','HT','JM','NI','PY','PE','VE','TT','PY','HN','PA','UY']

    for persona in busqueda.persona_set.all():

        if not d_personas.has_key(persona.id):                      
            d_paises = dict()
            d_persona = dict({persona.id:d_paises})
            d_personas.update(d_persona)
        
        logging.info('Persona ' +persona.name+ ' has the following country frequencies:' +str(d_personas[persona.id])+ \
                     ' and prediction='+str(persona.prediction))
        LA_freq = ('',0)
        mundo_freq = ('',0)
        for pais in d_personas[persona.id].keys():
            u_pais = pais.encode('utf-8')            
            if u_pais in LA:
                if d_personas[persona.id][u_pais] > LA_freq[1]:
                    LA_freq = (u_pais,d_personas[persona.id][u_pais])
            else:
                if d_personas[persona.id][u_pais] > mundo_freq[1]:
                    mundo_freq = (u_pais, d_personas[persona.id][u_pais])
        logging.info('Pais LA mas frequente:' +str(LA_freq))
        logging.info('Pais no LA mas frequente:' +str(mundo_freq))

        #3 del país móvil más frecuente
        #2 del país latinoamericano más frecuente
        #los demás con móviles
        if mundo_freq[1]>0 and LA_freq[1]>0:
            persona.prediction=1
            logging.info(persona.name+' is movil! with prediction=' +str(persona.prediction))            
        elif mundo_freq[1]>0 or LA_freq[1]>0:
            persona.prediction = 2
            logging.info('local!')
        else:
            persona.prediction = 3
            logging.info('no sé!')
        
        
        mobile_snippets = persona.snippet_set.filter(converging_pipelines=2).order_by('-RE_score')
        local_snippets = persona.snippet_set.filter(converging_pipelines=3).order_by('-RE_score')    
        converging_count = [0,0,0] #[converging_count, world_count, LA_count]
        if mundo_freq[1]>0:
            mobile_limit = min(3,mundo_freq[1])
            LA_limit = min(2,LA_freq[1])
            for s in mobile_snippets:
                if converging_count[0]>=5:
                    break
                if converging_count[1]<mobile_limit:
                    if mundo_freq[0] in str(s.featured_countries):
                        s.converging_pipelines=1
                        s.save()
                        converging_count[0]+=1
                        converging_count[1]+=1
                        logging.info('world hit!')
                elif converging_count[2]<LA_limit:
                    if LA_freq[0] in str(s.featured_countries):
                        s.converging_pipelines=1
                        s.save()
                        converging_count[0]+=1
                        converging_count[2]+=1
                        logging.info('LA hit!')
            if converging_count[0]<5:
                for s in local_snippets:
                    if converging_count[0]>=5:
                        break
                    if converging_count[1]<mobile_limit:
                        if mundo_freq[0] in str(s.featured_countries):
                            s.converging_pipelines=1
                            s.save()
                            converging_count[0]+=1
                            converging_count[1]+=1
                            logging.info('world local hit!')
                    elif converging_count[2]<LA_limit:
                        if LA_freq[0] in str(s.featured_countries):
                            s.converging_pipelines=1
                            s.save()
                            converging_count[0]+=1
                            converging_count[2]+=1
                            logging.info('LA local hit!')
        if converging_count[0]<5:
            for s in mobile_snippets:
                if converging_count[0]>=5:
                    break
                if s.converging_pipelines==1:
                    continue
                s.converging_pipelines=1
                s.save()
                converging_count[0]+=1
        if converging_count[0]<5:
            for s in local_snippets:
                if converging_count[0]>=5:
                    break
                if s.converging_pipelines==1:
                    continue
                s.converging_pipelines=1
                s.save()
                converging_count[0]+=1
        if converging_count[0]<5:
            todos = persona.snippet_set.filter(FG=1).exclude(RE=1).order_by('-RE_features')
            for s in local_snippets:
                if converging_count[0]>=5:
                    break
                if s.converging_pipelines==1:
                    continue
                s.converging_pipelines=1
                s.save()
                converging_count[0]+=1
        if converging_count<5:
            for s in local_snippets:
                if converging_count>=5:
                    break
                s.converging_pipelines=1
                s.save()
                converging_count +=1
        logging.info(persona.name+' is movil! with prediction=' +str(persona.prediction))
        persona.save()