def documents(request, corpus_name=None): response = _json( request ) # create documents if not request.REQUEST.has_key( 'corpus' ): return throw_error( response, error="corpus param is empty", code=API_EXCEPTION_INCOMPLETE) try: corpus = Corpus.objects.get(id=request.REQUEST.get('corpus')) except Exception, e: return throw_error( response, error="Exception: %s " % e, code=API_EXCEPTION_DOESNOTEXIST )
def segments_import( request, corpus_id ): response = _json( request, enable_method=False ) path = "/tmp/" # uncomment to debug response['path'] = path if not os.path.exists( path ): return throw_error(response, code=API_EXCEPTION_DOESNOTEXIST, error="path %s does not exits!" % path ) try: c = Corpus.objects.get(pk=corpus_id) except Exception, e: return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )
def create_corpus( request, response ): response['owner'] = request.user.json() form = ApiCorpusForm( request.REQUEST, initial={'owner':request.user.id} ) if form.is_valid(): corpus_path = settings.MEDIA_ROOT + os.path.basename( form.cleaned_data['name'] ) response['corpus_path'] = corpus_path try: # create corpus # create folder if does not exists if not os.path.exists( corpus_path ): os.makedirs( corpus_path ) c = Corpus( name=form.cleaned_data['name'] ) c.save() o = Owners( corpus=c, user=request.user ) o.save() except Exception, e: return throw_error( response, error="Exception: %s" % e, code="fault" ) response['created'] = c.json() return render_to_json( response )
def _save_or_die( model_name, response, app_name="anta", filters={}): from django.db.models.loading import get_model m = get_model(app_name,model_name) try: return m( **filters ).save() except Exception, e: return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_EMPTY )
def segment_stems( request, corpus_id=None ): response = _json( request ) # split order_by stuff # order_by = ["tfidf DESC","tfidf ASC","distribution ASC", "distribution DESC"] basic_query = """ SELECT s.stemmed as content, GROUP_CONCAT( s.content ) as sample, AVG( ds.tfidf ) as avg_tfidf, MAX( ds.tfidf ) as max_tfidf, MIN( ds.tfidf ) as min_tfidf, AVG( ds.tf ) as avg_tf, MAX( ds.tf ) as max_tf, MIN( ds.tf ) as min_tf, COUNT( distinct ds.document_id ) as distribution, COUNT( distinct s.id ) as aliases FROM anta_segment s JOIN anta_document_segment ds ON s.id = ds.segment_id JOIN anta_document d ON d.id = ds.document_id """ where = [] binds = [] if corpus_id is not None: try: c = Corpus.objects.get(pk=corpus_id) except Exception, e: return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST ) where.append("d.corpus_id = %s") binds.append(corpus_id)
def relation( request, id ): response = _json( request ) # all documents try: r = Relation.objects.get(id=id) except Exception, e: return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )
def segments_export( request, corpus_id ): c = _get_corpus( corpus_id ) if c is None: return throw_error( _json( request, enable_method=False ), error="corpus id %s does not exist..." % corpus_id, code=API_EXCEPTION_DOESNOTEXIST ) import unicodecsv ss = Segment.objects.raw(""" SELECT `anta_segment`.`id`, `anta_segment`.`content`, `anta_segment`.`language`, `anta_segment`.`stemmed`, `anta_segment`.`status`, MAX(`anta_document_segment`.`tfidf`) AS `max_tfidf`, MAX(`anta_document_segment`.`tf`) AS `max_tf`, COUNT(`anta_document_segment`.`document_id`) AS `distro` FROM `anta_segment` JOIN `anta_document_segment` ON (`anta_segment`.`id` = `anta_document_segment`.`segment_id`) JOIN `anta_document` ON (`anta_document_segment`.`document_id` = `anta_document`.`id`) WHERE `anta_document`.`corpus_id` = %s AND content NOT REGEXP '^[[:alpha:]][[:punct:]]$' GROUP BY `anta_segment`.`id` """,[corpus_id] ) response = HttpResponse(mimetype='text/csv; charset=utf-8') response['Content-Description'] = "File Transfer"; response['Content-Disposition'] = "attachment; filename=%s.csv" % c.name writer = unicodecsv.writer(response, encoding='utf-8') # headers writer.writerow(['segment_id', 'content', 'concept', 'distribution', 'max_tf', 'max_tfidf']) for s in ss: writer.writerow([ s.id, s.content, s.stemmed, s.distro, s.max_tf, s.max_tfidf]) return response
def pending_analysis_corpus( request, corpus_id ): response = _json( request ) try: response['objects'] = [ a.json() for a in Analysis.objects.filter( corpus__id = corpus_id, end_date = None ).order_by( "-id" )[ response['meta']['offset']: response['meta']['offset'] + response['meta']['limit'] ] ] except Eception, e: return throw_error( response, error="Exception thrown: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )
def document(request, document_id): response = _json( request ) # create or update a document # @todo d = _get_document( document_id ) if d is None: return throw_error( response, "document %s does not exist..." % document_id, code=API_EXCEPTION_DOESNOTEXIST) # delete a document if response['meta']['method'] == 'DELETE': return _delete_instance( request, response, instance=d, attachments=[ os.path.join(settings.MEDIA_ROOT, d.corpus.name, os.path.basename(d.url.url)), textify( d, settings.MEDIA_ROOT ) ]) # if method is POST, update the document if response['meta']['method'] == 'POST': form = UpdateDocumentForm( request.REQUEST ) if form.is_valid(): # save d.title = form.cleaned_data['title'] if len(form.cleaned_data['title'])>0 else d.title d.ref_date = form.cleaned_data['ref_date'] if form.cleaned_data['ref_date'] is not None else d.ref_date d.language = form.cleaned_data['language'] if len(form.cleaned_data['language'])>0 else d.language d.save() else: return throw_error( response, error=form.errors, code=API_EXCEPTION_FORMERRORS ) # load text only if it's required if 'with-text' in response['meta']: text = textify( d, settings.MEDIA_ROOT ) if text is None: return throw_error( response, "unable to provide txt version of the document") response['text'] = open(text, 'r').read() # f = open( text, "r") response['results'] = [ d.json() ] return render_to_json( response )
def _start_process( popen_args, routine, response ): import subprocess, sys response['routine'] = routine.json() try: subprocess.Popen(popen_args, stdout=None, stderr=None) except Exception, e: return throw_error(response, error="Exception: %s" % e, code=API_EXCEPTION)
def create_document( request, response, corpus ): path = settings.MEDIA_ROOT + corpus.name + "/" # uncomment to debug response['path'] = path if not os.path.exists( path ): return throw_error(response, code=API_EXCEPTION_DOESNOTEXIST, error="path %s does not exits!" % path ) # check preloaded vars if request.REQUEST.get('language', None) is not None: form = UpdateDocumentForm( request.REQUEST ) if form.is_valid(): response['presets'] = {} response['presets']['language'] = form.cleaned_data['language'] response['presets']['ref_date'] = form.cleaned_data['ref_date'] response['presets']['title'] = form.cleaned_data['title'] else: return throw_error(response, code=API_EXCEPTION_FORMERRORS, error=form.errors) if request.REQUEST.get('tags', None) is not None: if 'presets' not in response: response['presets'] = {} try: response['presets']['tags'] = json.loads( request.REQUEST.get('tags') ) except Exception, e: return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION ) for tag in response['presets']['tags']: form = TagForm( tag ) if form.is_valid(): response['message'] = 'form is valid!!!' t = form.save() elif "__all__" in form.errors: try: t = Tag.objects.get(name=tag['name'],type=tag['type'] ) except Exception, e: return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION ) else: return throw_error( response, error=form.errors, code=API_EXCEPTION_FORMERRORS ) tag['id'] = t.id
def download_document(request, document_id): d = _get_document( document_id ) if d is None: return throw_error( _json( request, enable_method=False ), "dcument does not exist...") response = _json( request, enable_method=False ) filename = settings.MEDIA_ROOT + d.corpus.name + "/" + os.path.basename( d.url.path ) response['filename'] = os.path.basename(filename) if not os.path.exists( filename ): return throw_error( _json( request, enable_method=False ), "dcument does not exist...") #return render_to_json( response ) response = HttpResponse( open( filename,'r' ).read(), content_type=d.mime_type) response['Content-Disposition']='attachment;filename="document_%s"'%d.id response['Content-length'] = os.stat( filename ).st_size return response
def update_similarity( request, corpus_id ): response = _json( request, enable_method=False ) from distiller import start_routine try: c = Corpus.objects.get(pk=corpus_id) routine = start_routine( type='RELSy', corpus=c ) except Exception, e: return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )
def attach_free_tag( request, document_id ): """ This function requires name and type given as args """ response = _json( request, enable_method=False ) try: d = Document.objects.get(pk=document_id) except Exception, e: return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )
def relations_graph(request, corpus_id): response = _json( request ) c = _get_corpus( corpus_id ) if c is None: return throw_error( response, "Corpus %s does not exist...," % corpus_id, code=API_EXCEPTION_DOESNOTEXIST ) response['corpus'] = c.json() # 0. BASIC filters for django queryset filters = ["d1.corpus_id=%s", "d2.corpus_id=%s"] ids = [] # 1. handle filters via get if len( response['meta']['filters'] ): try: ids = [ str(d.id) for d in Document.objects.filter(corpus=c,**response['meta']['filters'])] except Exception, e: return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION )
def get_corpus(request, corpus_id ): response = _json( request ) try: response['corpus'] = Corpus.objects.get(name=corpus_id).json() except: response['corpus'] = None return throw_error( response, "corpus does not exist...") return render_to_json( response )
def start_metrics( request, corpus_id): from utils import pushdocs from ampoule import decant response = _json( request, enable_method=False ) c = _get_corpus( corpus_id ) if c is None: # do sync return throw_error( response, "Corpus %s does not exist...," % corpus_id, code=API_EXCEPTION_DOESNOTEXIST ) # standard analysis includes: metrics a = _store_analysis( corpus=c, type="ST" ) # pushdocs try: a = pushdocs( corpus=c, analysis=a, path=settings.MEDIA_ROOT+c.name) except Exception,e: a.status = "ERR" a.save() return throw_error( response, "Exception: %s " % e, code=API_EXCEPTION_DOESNOTEXIST )
def streamgraph( request, corpus_id ): response = _json( request ) c = _get_corpus( corpus_id ) if c is None: return throw_error( response, "Corpus %s does not exist...," % corpus_id, code=API_EXCEPTION_DOESNOTEXIST ) from django.db import connection filters = "" if "filters" in response['meta']: ids = [ str(d.id) for d in Document.objects.filter(corpus__id=corpus_id,**response['meta']['filters'])] if len(ids) > 0: filters = " AND d.id IN ( %s )" % ",".join(ids) else: response['meta']['total'] = 0; response['actors'] = {} return render_to_json( response ) query = """ SELECT t.name, s.stemmed as concept, MAX(ds.tfidf), AVG(tf), count( DISTINCT s.id ) as distro FROM `anta_document_segment` ds JOIN anta_segment s ON s.id = ds.segment_id JOIN anta_document d ON d.id = ds.document_id JOIN anta_document_tag dt ON dt.document_id = ds.document_id JOIN anta_tag t ON t.id = dt.tag_id WHERE d.corpus_id = %s """ + filters + """ AND t.type='actor' GROUP BY t.id, concept ORDER BY `distro` DESC """ response['query'] = query cursor = connection.cursor() cursor.execute( query, [corpus_id] ) response['actors'] = {} i = 0 for row in cursor.fetchall(): if row[0] not in response['actors']: response['actors'][ row[0] ] = [] response['actors'][ row[0] ].append({ 'concept':row[1], 'tfidf':row[2], 'tf':row[3], 'f':row[4] }) i += 1 response['meta']['total'] = i; return render_to_json( response )
def corpus( request, id ): response = _json( request ) # all documents c = _get_corpus( id ) if c is None: return throw_error( response, "Corpus %s does not exist...," % id, code=API_EXCEPTION_DOESNOTEXIST ) response['results'] = [c.json()] if response['meta']['method'] == 'DELETE': c.delete() return render_to_json( response )
def create_relation( request, response ): response['owner'] = request.user.json() form = ApiRelationForm( request.REQUEST ) if form.is_valid(): r = Relation( source=form.cleaned_data['source'], target=form.cleaned_data['target'], polarity=form.cleaned_data['polarity'],description=form.cleaned_data['description'], owner=request.user ) r.save() response['created'] = r.json() return render_to_json( response ) else: return throw_error( response, error=form.errors, code=API_EXCEPTION_FORMERRORS )
def tfidf( request, corpus_id ): """ START the classic tfidf extraction. Open related sub-process with routine id. Return the routine created. """ from distiller import start_routine, stop_routine import subprocess, sys response = _json( request, enable_method=False ) try: c = Corpus.objects.get(pk=corpus_id) except Exception, e: return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )
def relations( request ): response = _json( request ) # create documents if response['meta']['method'] == 'POST': return create_relation( request, response ) if request.REQUEST.has_key( 'corpus' ): try: response['corpus'] = Corpus.objects.get(name=corpus).json() except: return throw_error( response, error="aje, corpus does not exist...") response['meta']['total'] = Relation.objects.filter( source__corpus__name=corpus, target__corpus__name=corpus).count() response['results'] = [r.json() for r in Relation.objects.filter( source__corpus__name=corpus, target__corpus__name=corpus) [response['meta']['offset']:response['meta']['limit'] ] ] return render_to_json( response ) return _get_instances( request, response, model_name="Relation" )
def _get_instances( request, response, model_name, app_name="anta" ): from django.db.models.loading import get_model from django.db.models import Q m = get_model(app_name,model_name) # get toal objects response['meta']['total'] = m.objects.count() try: # has OR clause (does not handle filters ) if response['meta']['queries'] is not None: #queries = reduce(operator.or_, [Q(x) for x in response['meta']['queries']]) #response['results'] = [i.json() for i in m.objects.filter( queries, **response['meta']['filters']).order_by(*response['meta']['order_by'])[ response['meta']['offset']: response['meta']['offset'] + response['meta']['limit'] ] ] pass response['results'] = [i.json() for i in m.objects.filter( **response['meta']['filters']).order_by(*response['meta']['order_by'])[ response['meta']['offset']: response['meta']['offset'] + response['meta']['limit'] ] ] except Exception, e: return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_EMPTY )
return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST ) response['results'] = [r.json()] if response['meta']['method'] == 'DELETE': r.delete() return render_to_json( response ) # create documents if response['meta']['method'] == 'POST': ## DOES NOT WORK. whiy? form = ApiRelationForm( request.REQUEST, instance=r ) if not form.is_valid(): return throw_error( response, error=form.errors, code=API_EXCEPTION_FORMERRORS ) form.save(commit=False) r.creation_date = datetime.now() r.owner = request.user r.save() r = Relation.objects.get(pk=id) response['results'] = [r.json()] return render_to_json( response ) # return create_relation( request, response ) # if method is POST, update the relation """
def login_requested( request ): response = _json( request ) return throw_error( response, error="you're not authenticated", code="auth failed" )
def access_denied( request ): response = _json( request ) return throw_error( response, error="access denied", code="forbidden" )
def _delete_instance( request, response, instance, attachments=[] ): try: instance.delete(); except Exception, e: return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_EMPTY )
def detach_tag( request, document_id, tag_id ): response = _json( request, enable_method=False ) try: d = Document.objects.get(pk=document_id) except Exception, e: return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )