Example #1
0
def documents(request, corpus_name=None):
	response = _json( request )

	# create documents
	if not request.REQUEST.has_key( 'corpus' ):
		return throw_error( response, error="corpus param is empty", code=API_EXCEPTION_INCOMPLETE)
	
	try:
		corpus = Corpus.objects.get(id=request.REQUEST.get('corpus'))
	except Exception, e:
		return throw_error( response, error="Exception: %s " % e, code=API_EXCEPTION_DOESNOTEXIST )
Example #2
0
def segments_import( request, corpus_id ):
	response = _json( request,  enable_method=False )
	path = "/tmp/"
	
	# uncomment to debug
	response['path'] = path
	
	if not os.path.exists( path ):
		return throw_error(response, code=API_EXCEPTION_DOESNOTEXIST, error="path %s does not exits!" % path )

	try:
		c = Corpus.objects.get(pk=corpus_id)
	except Exception, e:
		return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )
Example #3
0
def create_corpus( request, response ):
	response['owner'] = request.user.json()
	
	form = ApiCorpusForm( request.REQUEST, initial={'owner':request.user.id} )
	if form.is_valid():		
		corpus_path = settings.MEDIA_ROOT + os.path.basename( form.cleaned_data['name'] )
		response['corpus_path'] = corpus_path

		try:
			# create corpus
			
			# create folder if does not exists
			if not os.path.exists( corpus_path ):
				os.makedirs( corpus_path )
			
			c = Corpus( name=form.cleaned_data['name'] )
			
			c.save()
			o = Owners( corpus=c, user=request.user )
			o.save()
			
		except Exception, e:
			return throw_error( response, error="Exception: %s" % e, code="fault" )



			
		response['created'] = c.json()
		return render_to_json( response )
Example #4
0
def _save_or_die( model_name, response, app_name="anta", filters={}):
	from django.db.models.loading import get_model
	m = get_model(app_name,model_name)
	try:
		return m( **filters ).save()
	except Exception, e:
		return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_EMPTY )
Example #5
0
def segment_stems( request, corpus_id=None ):
	response = _json( request )

	# split order_by stuff
	# order_by = ["tfidf DESC","tfidf ASC","distribution ASC", "distribution DESC"]
	basic_query = """
		SELECT 
			s.stemmed as content, GROUP_CONCAT( s.content ) as sample, 
			AVG( ds.tfidf ) as avg_tfidf, MAX( ds.tfidf ) as max_tfidf, MIN( ds.tfidf ) as min_tfidf,
			AVG( ds.tf ) as avg_tf, MAX( ds.tf ) as max_tf, MIN( ds.tf ) as min_tf,
			COUNT( distinct ds.document_id ) as distribution,
			COUNT( distinct s.id ) as aliases FROM anta_segment s 
			JOIN anta_document_segment ds ON s.id = ds.segment_id
			JOIN anta_document d ON d.id = ds.document_id
		"""

	where = []
	binds = []

	if corpus_id is not None:
		try:
			c = Corpus.objects.get(pk=corpus_id)
		except Exception, e:
			return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )
		where.append("d.corpus_id = %s")
		binds.append(corpus_id)
Example #6
0
def relation( request, id ):
	response = _json( request )
	# all documents
	try:
		r =  Relation.objects.get(id=id)
	except Exception, e:
		return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )	
Example #7
0
def segments_export( request, corpus_id ):
	c =  _get_corpus( corpus_id )
	if c is None:
		return throw_error( _json( request, enable_method=False ), error="corpus id %s does not exist..." % corpus_id, code=API_EXCEPTION_DOESNOTEXIST )
	import unicodecsv
	ss = Segment.objects.raw("""
		SELECT 
			`anta_segment`.`id`, `anta_segment`.`content`, `anta_segment`.`language`, 
			`anta_segment`.`stemmed`, `anta_segment`.`status`, 
			MAX(`anta_document_segment`.`tfidf`) AS `max_tfidf`,
			MAX(`anta_document_segment`.`tf`) AS `max_tf`, 
			COUNT(`anta_document_segment`.`document_id`) AS `distro` 
		FROM `anta_segment`
			JOIN `anta_document_segment` ON (`anta_segment`.`id` = `anta_document_segment`.`segment_id`) 
			JOIN `anta_document` ON (`anta_document_segment`.`document_id` = `anta_document`.`id`) 
		WHERE `anta_document`.`corpus_id` = %s AND content NOT REGEXP '^[[:alpha:]][[:punct:]]$'
		GROUP BY `anta_segment`.`id`
		""",[corpus_id]
	) 
	
	response = HttpResponse(mimetype='text/csv; charset=utf-8')
	response['Content-Description'] = "File Transfer";
	response['Content-Disposition'] = "attachment; filename=%s.csv" % c.name 
	writer = unicodecsv.writer(response, encoding='utf-8')
	
	# headers	
	writer.writerow(['segment_id', 'content', 'concept', 'distribution', 'max_tf', 'max_tfidf'])

	for s in ss:
		writer.writerow([  s.id, s.content, s.stemmed, s.distro,  s.max_tf, s.max_tfidf])
	
	return response
Example #8
0
def pending_analysis_corpus( request, corpus_id ):
	response = _json( request )
		
	try:	
		response['objects'] = [ a.json() for a in Analysis.objects.filter( corpus__id = corpus_id, end_date = None ).order_by( "-id" )[  response['meta']['offset']: response['meta']['offset'] + response['meta']['limit'] ] ]
	except Eception, e:
		return throw_error( response, error="Exception thrown: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )	
Example #9
0
def document(request, document_id):
	response = _json( request )

	# create or update a document
	# @todo	

	d = _get_document( document_id )
	if d is None:
		return throw_error( response, "document %s does not exist..." % document_id, code=API_EXCEPTION_DOESNOTEXIST)
	
	# delete a document
	if response['meta']['method'] == 'DELETE':
		
		return _delete_instance( request, response, instance=d, attachments=[
			os.path.join(settings.MEDIA_ROOT, d.corpus.name, os.path.basename(d.url.url)),
			textify( d, settings.MEDIA_ROOT )
		])
        

	# if method is POST, update the document
	if response['meta']['method'] == 'POST':
		form = UpdateDocumentForm( request.REQUEST )
		if form.is_valid():
			# save
			d.title = form.cleaned_data['title'] if len(form.cleaned_data['title'])>0 else d.title
			d.ref_date = form.cleaned_data['ref_date'] if form.cleaned_data['ref_date'] is not None else d.ref_date
			d.language = form.cleaned_data['language'] if len(form.cleaned_data['language'])>0 else d.language
			d.save()

		else:
			return throw_error( response, error=form.errors, code=API_EXCEPTION_FORMERRORS )

	# load text only if it's required
	if 'with-text' in response['meta']:

		text = textify( d, settings.MEDIA_ROOT )
		
		if text is None:
			return throw_error( response, "unable to provide txt version of the document")
		
		response['text']	= open(text, 'r').read()
	
	# f = open( text, "r")
		
	response['results'] = [ d.json() ]
	
	return render_to_json( response )
Example #10
0
def _start_process( popen_args, routine, response ):
	import subprocess, sys

	response['routine'] = routine.json()

	try:
		subprocess.Popen(popen_args, stdout=None, stderr=None)
	except Exception, e:
		return throw_error(response, error="Exception: %s" % e, code=API_EXCEPTION)
Example #11
0
def create_document( request, response, corpus ):



	path = settings.MEDIA_ROOT + corpus.name + "/"
	
	# uncomment to debug
	response['path'] = path
	
	if not os.path.exists( path ):
		return throw_error(response, code=API_EXCEPTION_DOESNOTEXIST, error="path %s does not exits!" % path )

	# check preloaded vars
	if request.REQUEST.get('language', None) is not None:
		form = UpdateDocumentForm( request.REQUEST )
		if form.is_valid():
			response['presets'] = {}
			response['presets']['language'] = form.cleaned_data['language']
			response['presets']['ref_date'] = form.cleaned_data['ref_date']
			response['presets']['title'] = form.cleaned_data['title']
		else:
			return throw_error(response, code=API_EXCEPTION_FORMERRORS, error=form.errors)

	if request.REQUEST.get('tags', None) is not None:
		if 'presets' not in response:
			response['presets'] = {}
		try:
			response['presets']['tags'] = json.loads( request.REQUEST.get('tags') )
		except Exception, e:
			return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION )

		for tag in response['presets']['tags']:
			form = TagForm( tag )
			if form.is_valid():
				response['message'] = 'form is valid!!!'
				t = form.save()
			elif "__all__" in form.errors:
				try:
					t = Tag.objects.get(name=tag['name'],type=tag['type'] )
				except Exception, e:
					return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION )
			else:
				return throw_error( response, error=form.errors, code=API_EXCEPTION_FORMERRORS )
			tag['id'] = t.id
Example #12
0
def download_document(request, document_id):
	
	d = _get_document( document_id )
	if d is None:
		return throw_error( _json( request, enable_method=False ), "dcument does not exist...")
	response = _json( request, enable_method=False )
	
	filename = settings.MEDIA_ROOT + d.corpus.name + "/" + os.path.basename( d.url.path )
	response['filename'] = os.path.basename(filename)
	
	if not os.path.exists( filename ):
		return throw_error( _json( request, enable_method=False ), "dcument does not exist...")
	
	#return render_to_json( response )
	
	response = HttpResponse( open( filename,'r' ).read(), content_type=d.mime_type) 
	response['Content-Disposition']='attachment;filename="document_%s"'%d.id
	response['Content-length'] = os.stat( filename ).st_size
	return response
Example #13
0
def update_similarity( request, corpus_id ):
	response = _json( request, enable_method=False )
	
	from distiller import start_routine

	try:
		c = Corpus.objects.get(pk=corpus_id)
		routine = start_routine( type='RELSy', corpus=c )
	except Exception, e:
		return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )
Example #14
0
def attach_free_tag( request, document_id ):
	"""
	This function requires name and type given as args
	"""
	response = _json( request, enable_method=False )
	
	try:
		d = Document.objects.get(pk=document_id)
	except Exception, e:
		return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )
Example #15
0
def relations_graph(request, corpus_id):
	response = _json( request )
	
	c =  _get_corpus( corpus_id )
	if c is None:
		return throw_error( response, "Corpus %s does not exist...," % corpus_id, code=API_EXCEPTION_DOESNOTEXIST )	
	response['corpus'] = c.json()

	# 0. BASIC filters for django queryset
	filters = ["d1.corpus_id=%s", "d2.corpus_id=%s"]
	ids = []

	# 1. handle filters via get

	if len( response['meta']['filters'] ):
		try:
			ids = [ str(d.id) for d in Document.objects.filter(corpus=c,**response['meta']['filters'])]
		except Exception, e:
			return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION )
Example #16
0
def get_corpus(request, corpus_id ):
	response = _json( request )
	
	try:
		response['corpus'] = Corpus.objects.get(name=corpus_id).json()
	except:
		response['corpus'] = None
		return throw_error( response, "corpus does not exist...")
	
	return render_to_json( response )
Example #17
0
def start_metrics( request, corpus_id):
	from utils import pushdocs
	from ampoule import decant
	response = _json( request, enable_method=False )
	
	
	c =  _get_corpus( corpus_id )
	
	if c is None:
		# do sync
		return throw_error( response, "Corpus %s does not exist...," % corpus_id, code=API_EXCEPTION_DOESNOTEXIST )	
	
	# standard analysis includes: metrics
	a = _store_analysis( corpus=c, type="ST" )

	# pushdocs
	try:
		a = pushdocs( corpus=c, analysis=a, path=settings.MEDIA_ROOT+c.name)
	except Exception,e:
		a.status = "ERR"
		a.save()
		return throw_error( response, "Exception: %s " % e, code=API_EXCEPTION_DOESNOTEXIST )	
Example #18
0
def streamgraph( request, corpus_id ):
	response = _json( request )
	c = _get_corpus( corpus_id )
	if c is None:
		return throw_error( response, "Corpus %s does not exist...," % corpus_id, code=API_EXCEPTION_DOESNOTEXIST )	
	from django.db import connection

	filters = ""
	if "filters" in response['meta']:
		ids = [ str(d.id) for d in Document.objects.filter(corpus__id=corpus_id,**response['meta']['filters'])]
		if len(ids) > 0:
			filters = " AND d.id IN ( %s )" % ",".join(ids)
		else:
			response['meta']['total'] = 0;
			response['actors'] = {}
			return render_to_json( response )
	query = """
		SELECT 
	    	t.name,  s.stemmed as concept, MAX(ds.tfidf), AVG(tf),
			count( DISTINCT s.id ) as distro 
		FROM `anta_document_segment` ds
			JOIN anta_segment s ON s.id = ds.segment_id
			JOIN anta_document d ON d.id = ds.document_id
			JOIN anta_document_tag dt ON dt.document_id = ds.document_id 
			JOIN anta_tag t ON t.id = dt.tag_id 
			
		WHERE d.corpus_id = %s """ + filters + """ AND t.type='actor'
		GROUP BY t.id, concept  ORDER BY `distro` DESC
		"""
	response['query'] = query
	cursor = connection.cursor()
	cursor.execute( query, [corpus_id]
	)

	response['actors'] = {}
	i = 0
	for row in cursor.fetchall():
		if row[0] not in response['actors']:
			response['actors'][ row[0] ] = []

		response['actors'][ row[0] ].append({
			'concept':row[1],
			'tfidf':row[2],
			'tf':row[3],
			'f':row[4]
		})
		i += 1

	response['meta']['total'] = i;

	return render_to_json( response )
Example #19
0
def corpus( request, id ):
	response = _json( request )
	# all documents
	c =  _get_corpus( id )
	if c is None:
		return throw_error( response, "Corpus %s does not exist...," % id, code=API_EXCEPTION_DOESNOTEXIST )	
	
	response['results'] = [c.json()]
		
	if response['meta']['method'] == 'DELETE':
		c.delete()		
	
	
	return render_to_json( response )
Example #20
0
def create_relation( request, response ):
	response['owner'] = request.user.json()
	form = ApiRelationForm( request.REQUEST )
	if form.is_valid():
		r = Relation( 
			source=form.cleaned_data['source'], target=form.cleaned_data['target'],
			polarity=form.cleaned_data['polarity'],description=form.cleaned_data['description'], 
			owner=request.user
		)
		r.save()
		response['created'] = r.json()
		return render_to_json( response )
	else:
		return throw_error( response, error=form.errors, code=API_EXCEPTION_FORMERRORS )
Example #21
0
def tfidf( request, corpus_id ):
	"""
	START the classic tfidf extraction. 
	Open related sub-process with routine id.
	Return the routine created.
	"""
	from distiller import start_routine, stop_routine
	import subprocess, sys

	response = _json( request, enable_method=False )
	
	try:
		c = Corpus.objects.get(pk=corpus_id)
	except Exception, e:
		return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )
Example #22
0
def relations( request ):
	response = _json( request )

	# create documents
	if response['meta']['method'] == 'POST':
		return create_relation( request, response )
	
	if request.REQUEST.has_key( 'corpus' ):
		try:
			response['corpus'] = Corpus.objects.get(name=corpus).json()
		except:
			return throw_error( response, error="aje, corpus does not exist...")
		response['meta']['total'] = Relation.objects.filter( source__corpus__name=corpus, target__corpus__name=corpus).count()		
		response['results'] = [r.json() for r in Relation.objects.filter( source__corpus__name=corpus, target__corpus__name=corpus) [response['meta']['offset']:response['meta']['limit'] ]  ]
		return render_to_json( response )
	
	return _get_instances( request, response, model_name="Relation" )
Example #23
0
def _get_instances( request, response, model_name, app_name="anta" ):
	from django.db.models.loading import get_model
	from django.db.models import Q
	m = get_model(app_name,model_name)
	
	# get toal objects
	response['meta']['total'] = m.objects.count()
	
	try:
		# has OR clause (does not handle filters )
		if response['meta']['queries'] is not None:
			#queries = reduce(operator.or_, [Q(x) for x in response['meta']['queries']])
			#response['results'] = [i.json() for i in m.objects.filter( queries, **response['meta']['filters']).order_by(*response['meta']['order_by'])[ response['meta']['offset']: response['meta']['offset'] + response['meta']['limit'] ] ]
			
			pass

		response['results'] = [i.json() for i in m.objects.filter( **response['meta']['filters']).order_by(*response['meta']['order_by'])[ response['meta']['offset']: response['meta']['offset'] + response['meta']['limit'] ] ]
	except Exception, e:
		return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_EMPTY )
Example #24
0
		return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )	
	
	response['results'] = [r.json()]
	
	if response['meta']['method'] == 'DELETE':
		r.delete()		
		return render_to_json( response )

	# create documents
	if response['meta']['method'] == 'POST':
		## DOES NOT WORK. whiy? 
		form = ApiRelationForm( request.REQUEST, instance=r )
		
		if not form.is_valid():

			return throw_error( response, error=form.errors, code=API_EXCEPTION_FORMERRORS )	
		
		
		form.save(commit=False)
		r.creation_date = datetime.now()
		r.owner = request.user
		r.save()
		r = Relation.objects.get(pk=id)
		response['results'] = [r.json()]
		return render_to_json( response )

		# return create_relation( request, response )
	

	# if method is POST, update the relation
	"""
Example #25
0
def login_requested( request ):
	response = _json( request )
	return throw_error( response, error="you're not authenticated", code="auth failed" )
Example #26
0
def access_denied( request ):
	response = _json( request )
	return throw_error( response, error="access denied", code="forbidden" )
Example #27
0
def _delete_instance( request, response, instance, attachments=[] ):
	
	try:
		instance.delete();
	except Exception, e:
		return throw_error( response, error="Exception: %s" % e, code=API_EXCEPTION_EMPTY )
Example #28
0
def detach_tag( request, document_id, tag_id ):
	response = _json( request, enable_method=False )
	try:
		d = Document.objects.get(pk=document_id)
	except Exception, e:
		return throw_error( response, "Exception: %s" % e, code=API_EXCEPTION_DOESNOTEXIST )