コード例 #1
0
ファイル: idea.py プロジェクト: colinallen/inphosite
    def evaluate(self, id=None):
        if not h.auth.is_logged_in():
            abort(401)

        c.idea = h.fetch_obj(Idea, id, new_id=True)
        node_q = Session.query(Node).filter_by(concept_id=id)
        c.node = node_q.first()
        if request.environ.get('REMOTE_USER', False):
            user = h.get_user(request.environ['REMOTE_USER'])

            sq = Session.query(IdeaEvaluation.cons_id)
            sq = sq.filter(IdeaEvaluation.ante==c.idea)
            sq = sq.filter(IdeaEvaluation.uid==user.ID)
            sq = sq.subquery()

            to_evaluate = c.idea.related.outerjoin((sq, Idea.ID==sq.c.cons_id))
            to_evaluate = to_evaluate.filter(sq.c.cons_id==None)

        else:
            to_evaluate = c.idea.related

        c.paginator = paginate.Page(
            to_evaluate,
            page=int(request.params.get('page', 1)),
            items_per_page=10,
            controller='idea',
            action='edit',
            id=id
        )

        response.headers['Access-Control-Allow-Origin'] = '*' 

        return render('idea/idea-edit.html')
コード例 #2
0
ファイル: taxonomy.py プロジェクト: inpho/inphosite
    def list(self, filetype="html"):
        c.nodes = Session.query(Node).all()

        entity_q = Session.query(Node)
        entity_q = entity_q.limit(request.params.get("limit", None))

        c.query = request.params.get("q", "")
        c.sep = request.params.get("sep", "")

        if request.params.get("sep_filter", False):
            entity_q = entity_q.filter(Entity.sep_dir != "")

        if c.sep:
            entity_q = entity_q.filter(Entity.sep_dir == c.sep)

        if c.query:
            o = or_(Entity.label.like(c.query + "%"), Entity.label.like("% " + c.query + "%"))
            entity_q = entity_q.filter(o).order_by(func.length(Entity.label))

        if filetype == "json":
            response.content_type = "application/json"
        response.headers["Access-Control-Allow-Origin"] = "*"

        c.entities = entity_q.all()
        if request.params.get("redirect", False) and len(c.entities) == 1:
            h.redirect(
                h.url(controller=self._controller, action="view", filetype=filetype, id=c.entities[0].ID), code=302
            )
        else:
            return render("{type}/{type}-list.".format(type=self._controller) + filetype)
コード例 #3
0
ファイル: idea.py プロジェクト: nessc/inphosite
    def evaluate(self, id=None):
        if not h.auth.is_logged_in():
            abort(401)

        c.idea = h.fetch_obj(Idea, id, new_id=True)
        node_q = Session.query(Node).filter_by(concept_id=id)
        c.node = node_q.first()
        if request.environ.get('REMOTE_USER', False):
            user = h.get_user(request.environ['REMOTE_USER'])

            sq = Session.query(IdeaEvaluation.cons_id)
            sq = sq.filter(IdeaEvaluation.ante == c.idea)
            sq = sq.filter(IdeaEvaluation.uid == user.ID)
            sq = sq.subquery()

            to_evaluate = c.idea.related.outerjoin(
                (sq, Idea.ID == sq.c.cons_id))
            to_evaluate = to_evaluate.filter(sq.c.cons_id == None)

        else:
            to_evaluate = c.idea.related

        c.paginator = paginate.Page(to_evaluate,
                                    page=int(request.params.get('page', 1)),
                                    items_per_page=10,
                                    controller='idea',
                                    action='edit',
                                    id=id)

        response.headers['Access-Control-Allow-Origin'] = '*'

        return render('idea/idea-edit.html')
コード例 #4
0
    def list(self, filetype='html'):
        c.nodes = Session.query(Node).all()
        
        entity_q = Session.query(Node)
        entity_q = entity_q.limit(request.params.get('limit', None))
        
        c.query = request.params.get('q', '')
        c.sep = request.params.get('sep', '')

        if request.params.get('sep_filter', False):
            entity_q = entity_q.filter(Entity.sep_dir != '')
        
        if c.sep:
            entity_q = entity_q.filter(Entity.sep_dir == c.sep) 

        if c.query:
            o = or_(Entity.label.like(c.query+'%'), Entity.label.like('% '+c.query+'%'))
            entity_q = entity_q.filter(o).order_by(func.length(Entity.label))
        
        if filetype=='json':
            response.content_type = 'application/json'
        response.headers['Access-Control-Allow-Origin'] = '*' 

        c.entities = entity_q.all()
        if request.params.get('redirect', False) and len(c.entities) == 1: 
            h.redirect(h.url(controller=self._controller, action='view', 
                             filetype=filetype, id=c.entities[0].ID), 
                       code=302)
        else:
            return render('{type}/{type}-list.'.format(type=self._controller) 
                          + filetype)
コード例 #5
0
ファイル: entity.py プロジェクト: nessc/inphosite
    def list(self, filetype='html'):
        entity_q = Session.query(self._type)
        #TODO: Remove the following line when Nodes are eliminated
        entity_q = entity_q.filter(Entity.typeID != 2)

        c.missing_entity = 0
        # get the list of entities
        #c.entities = entity_q.all()

        c.nodes = Session.query(Node).filter(Node.parent_id == None)
        c.nodes = c.nodes.order_by("name").all()

        c.query = request.params.get('q', '')
        c.query = c.query.strip()

        c.sep = request.params.get('sep', '')

        c.wiki = request.params.get('wiki', '')

        if request.params.get('sep_filter', False):
            entity_q = entity_q.filter(Entity.sep_dir != '')

        if c.sep:
            entity_q = entity_q.filter(Entity.sep_dir == c.sep)

        if c.wiki:
            entity_q = entity_q.filter(Entity.wiki == c.wiki)

        if c.query:
            o = or_(Entity.label.like(c.query + '%'),
                    Entity.label.like('% ' + c.query + '%'),
                    Entity.label.like('%-' + c.query + '%'))
            entity_q = entity_q.filter(o).order_by(func.length(Entity.label))

        c.total = entity_q.count()
        # limit must be the last thing applied to the query
        entity_q = entity_q.limit(request.params.get('limit', None))
        c.entities = entity_q.all()

        if filetype == 'json':
            response.content_type = 'application/json'

        if request.params.get('redirect', False) and len(c.entities) == 1:
            h.redirect(h.url(controller=self._controller,
                             action='view',
                             filetype=filetype,
                             id=c.entities[0].ID),
                       code=302)
        else:
            #if there are no results, show the related SEP results
            if not c.entities:
                c.entities = self.missing_entity_search(c.query)
                if c.entities:
                    c.missing_entity = 1
        #raise Exception
        #render the page
        return render('{type}/{type}-list.'.format(type=self._controller) +
                      filetype)
コード例 #6
0
ファイル: entity.py プロジェクト: inpho/inphosite
    def list(self, filetype="html"):
        entity_q = Session.query(self._type)
        # TODO: Remove the following line when Nodes are eliminated
        entity_q = entity_q.filter(Entity.typeID != 2)

        c.missing_entity = 0
        # get the list of entities
        # c.entities = entity_q.all()

        c.nodes = Session.query(Node).filter(Node.parent_id == None)
        c.nodes = c.nodes.order_by("name").all()

        c.query = request.params.get("q", "")
        c.query = c.query.strip()

        c.sep = request.params.get("sep", "")

        c.wiki = request.params.get("wiki", "")

        if request.params.get("sep_filter", False):
            entity_q = entity_q.filter(Entity.sep_dir != "")

        if c.sep:
            entity_q = entity_q.filter(Entity.sep_dir == c.sep)

        if c.wiki:
            entity_q = entity_q.filter(Entity.wiki == c.wiki)

        if c.query:
            o = or_(
                Entity.label.like(c.query + "%"),
                Entity.label.like("% " + c.query + "%"),
                Entity.label.like("%-" + c.query + "%"),
            )
            entity_q = entity_q.filter(o).order_by(func.length(Entity.label))

        c.total = entity_q.count()
        # limit must be the last thing applied to the query
        entity_q = entity_q.limit(request.params.get("limit", None))
        c.entities = entity_q.all()

        if filetype == "json":
            response.content_type = "application/json"

        if request.params.get("redirect", False) and len(c.entities) == 1:
            h.redirect(
                h.url(controller=self._controller, action="view", filetype=filetype, id=c.entities[0].ID), code=302
            )
        else:
            # if there are no results, show the related SEP results
            if not c.entities:
                c.entities = self.missing_entity_search(c.query)
                if c.entities:
                    c.missing_entity = 1
        # raise Exception
        # render the page
        return render("{type}/{type}-list.".format(type=self._controller) + filetype)
コード例 #7
0
ファイル: idea.py プロジェクト: colinallen/inphosite
 def graph_all(self, filetype='html', limit=False):
     sep_filter = request.params.get('sep_filter', False) 
     c.sep_filter = sep_filter
     idea_q = Session.query(Idea)
     c.ideas = idea_q.all()
     
     edge_q =\
     Session.query(IdeaGraphEdge).order_by(IdeaGraphEdge.jweight.desc()).limit(3*len(c.ideas))
     c.edges = edge_q.all()
     
     return render('idea/graph_all.' + filetype)
コード例 #8
0
ファイル: idea.py プロジェクト: nessc/inphosite
    def graph_all(self, filetype='html', limit=False):
        sep_filter = request.params.get('sep_filter', False)
        c.sep_filter = sep_filter
        idea_q = Session.query(Idea)
        c.ideas = idea_q.all()

        edge_q =\
        Session.query(IdeaGraphEdge).order_by(IdeaGraphEdge.jweight.desc()).limit(3*len(c.ideas))
        c.edges = edge_q.all()

        return render('idea/graph_all.' + filetype)
コード例 #9
0
ファイル: inpho.py プロジェクト: zhaoyang9425/topic-explorer
def _inpho_token_generator(document):
    if PUNC_TABLE.get(ord('-')):
        del PUNC_TABLE[ord('-')]
    PUNC_TABLE[ord('\n')] = ord(' ')
    
    rest = document.lower()
    rest = rehyph(rest)
    rest = strip_punc_word(rest)
    query = Session.query(Searchpattern)

    MIN_LEN = 6 
    short_patterns = Session.query(Searchpattern.searchpattern)
    short_patterns = short_patterns.filter(func.length(Searchpattern.searchpattern) < MIN_LEN)
    short_patterns = short_patterns.distinct().all()
    short_patterns = set(w[0] for w in short_patterns)

    while rest:
        if u' ' not in rest:
            yield rest
            return

        first, rest = rest.split(u' ', 1)
        rest = rest.strip()

        # always yield the raw string
        yield first

        # check if we can simply skip the short patterns
        if len(first) < MIN_LEN and first not in short_patterns:
            continue


       
        # search the database for keywords
        patterns = query.filter(Searchpattern.searchpattern.like(first + u' %')).all()
        
        exact_match = query.filter(Searchpattern.searchpattern==first).first()
        if exact_match is not None:
            patterns.append(exact_match)

        for p in patterns:
            # check if multi-phrase starts match in the rest of the phrase.
            if u' ' in p.searchpattern:
                first_pattern_word, longpattern = p.searchpattern.split(u' ',  1)
                if first == first_pattern_word and (rest == longpattern 
			or rest.startswith(longpattern + u' ')):
                    yield u"inpho:{}".format(p.entity.ID)
            elif first == p.searchpattern:
                yield u"inpho:{}".format(p.entity.ID)
コード例 #10
0
ファイル: idea.py プロジェクト: colinallen/inphosite
 def _list_property(self, property, id, filetype='html', limit=False, sep_filter=False, type='idea'):
     c.idea = h.fetch_obj(Idea, id)
      
     limit = int(request.params.get('limit', limit))
     start = int(request.params.get('start', 0))
     sep_filter = request.params.get('sep_filter', sep_filter)
     property = getattr(c.idea, property)
     if sep_filter:
         property = property.filter(Entity.sep_dir != '')
     
     # TODO: Fix hacky workaround for the AppenderQuery vs. Relationship
     # property issue - upgrading SQLAlchemy may fix this by allowing us to
     # use len() in a smart way.
     try:
         c.total = property.count()
     except TypeError:
         c.total = len(property)
         
      
     if limit:
         property = property[start:start+limit]
     
     c.entities = property
     c.nodes = Session.query(Node).filter(Node.parent_id == None).order_by("name").all()
     return render('%s/%s-list.%s' %(type, type, filetype))
コード例 #11
0
def process_article(article, terms=None, entity_type=Idea, output_filename=None,
                    corpus_root='corpus/'):
    """
    Processes a single article for apriori input.
    """
    if terms is None:
        terms = select_terms(entity_type)
    

    lines = []

    filename = article_path(article)
    article_terms = Session.query(entity_type)
    article_terms = article_terms.filter(entity_type.sep_dir==article)
    article_terms = article_terms.all()
    if filename and os.path.isfile(filename):
        logging.info("processing: %s %s" % (article, filename))
        doc = extract_article_body(filename)
        lines = dm.occurrences(doc, terms, title=article,
                               remove_overlap=False,
                               format_for_file=True,
                               output_filename=output_filename)
    else:
        logging.warning("BAD SEP_DIR: %s" % article)

    return lines
コード例 #12
0
ファイル: idea.py プロジェクト: nessc/inphosite
    def _get_evaluation(self,
                        id,
                        id2,
                        uid=None,
                        username=None,
                        autoCreate=True):
        idea1 = h.fetch_obj(Idea, id, new_id=True)
        idea2 = h.fetch_obj(Idea, id2, new_id=True)

        # Get user information
        if uid:
            uid = h.fetch_obj(User, uid).ID
        elif username:
            user = h.get_user(username)
            uid = user.ID if user else abort(403)
        else:
            uid = h.get_user(request.environ['REMOTE_USER']).ID

        evaluation_q = Session.query(IdeaEvaluation)
        evaluation = evaluation_q.filter_by(ante_id=id, cons_id=id2,
                                            uid=uid).first()

        # if an evaluation does not yet exist, create one
        if autoCreate and not evaluation:
            evaluation = IdeaEvaluation(id, id2, uid)
            Session.add(evaluation)

        return evaluation
コード例 #13
0
def update_partial_graph(entity_type, occurrences):
    """
    Takes an entity type and a SQL filename and only updates part of the graph.
    For use with single article statistical information.
    """
    raise NotImplementedError

    # Import SQL statements
    if entity_type == Idea:
        table = "idea_graph_edges"
        type = IdeaGraphEdge
    elif entity_type == Thinker:
        table = "thinker_graph_edges"
        type = ThinkerGraphEdge
    else:
        table = "idea_thinker_graph_edges"
        type = IdeaThinkerGraphEdge

    edges = Session.query(type)
    # filter edges query to only the key term

    for ante, occurs in occurrences.iteritems():
        for cons, occurs_in in occurs.iteritems():
            # select the proper edge from result set
            # if edge does not exist, create it and add to session
            
            #update edge
            edge.occurs_in = occurs_in

    # commit changes
    Session.commit()
コード例 #14
0
ファイル: idea.py プロジェクト: nessc/inphosite
    def data_integrity(self, filetype="html", redirect=False):
        if not h.auth.is_logged_in():
            abort(401)
        if not h.auth.is_admin():
            abort(403)

        idea_q = Session.query(Idea)
        c.ideas = list(idea_q)

        # Missing searchstring
        c.missing_string = [
            idea for idea in c.ideas if not getattr(idea, 'searchstring')
        ]

        # Missing searchpattern
        c.missing_pattern = [
            idea for idea in c.ideas if not getattr(idea, 'searchpattern')
        ]

        # Missing sep_dir
        c.missing_sep_dir = [
            idea for idea in c.ideas if not getattr(idea, 'sep_dir')
        ]

        # Duplicates
        c.duplicate = []
        c.sorted_ideas = sorted(c.ideas, key=lambda idea: idea.label)
        for i in range(len(c.sorted_ideas) - 1):
            if c.sorted_ideas[i].label == c.sorted_ideas[i + 1].label:
                c.duplicate.append(c.sorted_ideas[i])
                c.duplicate.append(c.sorted_ideas[i + 1])

        return render('idea/data_integrity.%s' % filetype)
コード例 #15
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def new_entries():
    """
    Returns a list of all entries which do not have a corresponding InPhO Entity.
    """

    # get list of all entries in database
    sep_dirs = Session.query(Entity.sep_dir).filter(Entity.sep_dir != '').all()
    sep_dirs = [row[0] for row in sep_dirs]

    # get list of all entries in the SEP database
    entries = os.path.join(config.get('corpus', 'db_path'), 'entries.txt')

    # build list of new entries
    new_sep_dirs = []
    with open(entries) as f:
        for line in f:
            sep_dir = line.split('::', 1)[0]
            try:
                if sep_dir not in sep_dirs and copy_edit(sep_dir):
                    # published entry not in database, add to list of entries
                    new_sep_dirs.append(sep_dir)
            except IOError:
                # skip IOErrors, as these indicate potential entries w/o logs
                continue

    # remove the sample entry
    try:
        new_sep_dirs.remove('sample')
    except ValueError:
        pass

    return new_sep_dirs
コード例 #16
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def process_article(article,
                    terms=None,
                    entity_type=Idea,
                    output_filename=None,
                    corpus_root='corpus/'):
    """
    Processes a single article for apriori input.
    """
    if terms is None:
        terms = select_terms(entity_type)

    lines = []

    filename = article_path(article)
    article_terms = Session.query(entity_type)
    article_terms = article_terms.filter(entity_type.sep_dir == article)
    article_terms = article_terms.all()
    if filename and os.path.isfile(filename):
        logging.info("processing: %s %s" % (article, filename))
        doc = extract_article_body(filename)
        lines = dm.occurrences(doc,
                               terms,
                               title=article,
                               remove_overlap=False,
                               format_for_file=True,
                               output_filename=output_filename)
    else:
        logging.warning("BAD SEP_DIR: %s" % article)

    return lines
コード例 #17
0
def new_entries():
    """
    Returns a list of all entries which do not have a corresponding InPhO Entity.
    """

    # get list of all entries in database
    sep_dirs = Session.query(Entity.sep_dir).filter(Entity.sep_dir!='').all()
    sep_dirs = [row[0] for row in sep_dirs]

    # get list of all entries in the SEP database
    entries = os.path.join(config.get('corpus', 'db_path'), 'entries.txt')

    # build list of new entries
    new_sep_dirs = []
    with open(entries) as f:
        for line in f:
            sep_dir = line.split('::', 1)[0]
            try:
                if sep_dir not in sep_dirs and copy_edit(sep_dir):
                    # published entry not in database, add to list of entries
                    new_sep_dirs.append(sep_dir)
            except IOError:
                # skip IOErrors, as these indicate potential entries w/o logs
                continue

    # remove the sample entry
    try:
        new_sep_dirs.remove('sample')
    except ValueError:
        pass

    return new_sep_dirs
コード例 #18
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def update_partial_graph(entity_type, occurrences):
    """
    Takes an entity type and a SQL filename and only updates part of the graph.
    For use with single article statistical information.
    """
    raise NotImplementedError

    # Import SQL statements
    if entity_type == Idea:
        table = "idea_graph_edges"
        type = IdeaGraphEdge
    elif entity_type == Thinker:
        table = "thinker_graph_edges"
        type = ThinkerGraphEdge
    else:
        table = "idea_thinker_graph_edges"
        type = IdeaThinkerGraphEdge

    edges = Session.query(type)
    # filter edges query to only the key term

    for ante, occurs in occurrences.iteritems():
        for cons, occurs_in in occurs.iteritems():
            # select the proper edge from result set
            # if edge does not exist, create it and add to session

            #update edge
            edge.occurs_in = occurs_in

    # commit changes
    Session.commit()
コード例 #19
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def process_articles(entity_type=Entity,
                     output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)

    Session.expunge_all()
    Session.close()

    articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir != None)
    articles = articles.filter(Entity.sep_dir != '')
    articles = articles.distinct().all()
    articles = [a[0] for a in articles]

    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root)
            for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    #serial processing for tests
    '''
    doc_lines = []
    for title in articles:
        lines = process_article(title, terms, entity_type, None, corpus_root)
        doc_lines.append(lines)
    '''

    # write graph output to file
    print output_filename
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #20
0
def process_articles(entity_type=Entity, output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    
    Session.expunge_all()
    Session.close()
    
    articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir!=None)
    articles = articles.filter(Entity.sep_dir!='')
    articles = articles.distinct().all()
    articles = [a[0] for a in articles]
   
    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root) for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    #serial processing for tests
    '''
    doc_lines = []
    for title in articles:
        lines = process_article(title, terms, entity_type, None, corpus_root)
        doc_lines.append(lines)
    '''

    # write graph output to file
    print output_filename
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #21
0
    def missing_entity_search(self, query):
        query = quote_plus(query)
        url = 'http://plato.stanford.edu/cgi-bin/search/xmlSearcher.py?query=' + \
            query
        
        results = multi_get([url])[0][1]
        json = None
        values_dict = []
        if results:
            tree = ET.ElementTree(ET.fromstring(results))
            root = tree.getroot()
            json = []
            for element in root.getiterator('{http://a9.com/-/spec/opensearch/1.1/}Item'):
                dict = {}
                for iter in element.getiterator('{http://a9.com/-/spec/opensearch/1.1/}Location'):
                    dict['Location'] = iter.text
                json.append(dict)

            for j in range(len(json)):
                for key,value in json[j].iteritems():
                    values_dict.append(value)
            
        
        entities = Session.query(Entity).filter(Entity.sep_dir.in_(values_dict)).all()
        entities.sort(key = lambda entity: values_dict.index(entity.sep_dir))
        #raise Exception
        return entities
コード例 #22
0
ファイル: sep.py プロジェクト: etboggs/inpho
def select_terms(entity_type=Idea):
    # process entities
    ideas = Session.query(entity_type)
    ideas = ideas.options(subqueryload('_spatterns'))
    # do not process Nodes or Journals
    ideas = ideas.filter(and_(Entity.typeID != 2, Entity.typeID != 4))
    return ideas.all()
コード例 #23
0
ファイル: thinker.py プロジェクト: colinallen/inphosite
    def _delete_evaluation(self, evaltype, id, id2, uid=None, username=None):
        id2 = request.params.get('id2', id2)
        uid = request.params.get('uid', uid)
        username = request.params.get('username', username)

        # look for a specific user's feedback
        evaluation = self._get_evaluation(evaltype, id, id2, uid, username, 
                                          autoCreate=False)
        
        # if that feedback does not exist, unleash the nuclear option and delete
        # ALL evaluation facts for this relation, wiping it from the database.
        if h.auth.is_admin() and not evaluation:
            eval_q = Session.query(evaltype)
            eval_q = eval_q.filter_by(ante_id=id, cons_id=id2)
            evals = eval_q.all()

            # wipe them out. all of them.
            for evaluation in evals:
                h.delete_obj(evaluation)
            
            # return ok, with how many were deleted
            response.status_int = 200
            return "OK %d" % len(evals)

        elif not evaluation:
            abort(404) # simply return an error (not evaluated), if not admin

        current_uid = h.get_user(request.environ['REMOTE_USER']).ID
        if evaluation.uid != current_uid and not h.auth.is_admin():
            abort(401)

        h.delete_obj(evaluation)

        response.status_int = 200
        return "OK"
コード例 #24
0
ファイル: entity.py プロジェクト: inpho/inphosite
    def missing_entity_search(self, query):
        query = quote_plus(query)
        url = "http://plato.stanford.edu/cgi-bin/search/xmlSearcher.py?query=" + query

        results = multi_get([url])[0][1]
        json = None
        values_dict = []
        if results:
            tree = ET.ElementTree(ET.fromstring(results))
            root = tree.getroot()
            json = []
            for element in root.getiterator("{http://a9.com/-/spec/opensearch/1.1/}Item"):
                dict = {}
                for iter in element.getiterator("{http://a9.com/-/spec/opensearch/1.1/}Location"):
                    dict["Location"] = iter.text
                json.append(dict)

            for j in range(len(json)):
                for key, value in json[j].iteritems():
                    values_dict.append(value)

        entities = Session.query(Entity).filter(Entity.sep_dir.in_(values_dict)).all()
        entities.sort(key=lambda entity: values_dict.index(entity.sep_dir))
        # raise Exception
        return entities
コード例 #25
0
ファイル: sep.py プロジェクト: etboggs/inpho
def select_terms(entity_type=Idea):
    # process entities
    ideas = Session.query(entity_type)
    ideas = ideas.options(subqueryload('_spatterns'))
    # do not process Nodes or Journals
    ideas = ideas.filter(and_(Entity.typeID!=2, Entity.typeID!=4))
    return ideas.all()
コード例 #26
0
ファイル: idea.py プロジェクト: colinallen/inphosite
    def data_integrity(self, filetype="html", redirect=False):
        if not h.auth.is_logged_in():
            abort(401)
        if not h.auth.is_admin():
            abort(403)

        idea_q = Session.query(Idea)
        c.ideas = list(idea_q)

        # Missing searchstring
        c.missing_string = [idea for idea in c.ideas
                            if not getattr(idea, 'searchstring')]
        
        # Missing searchpattern
        c.missing_pattern = [idea for idea in c.ideas
                             if not getattr(idea, 'searchpattern')]
        
        # Missing sep_dir
        c.missing_sep_dir = [idea for idea in c.ideas
                             if not getattr(idea, 'sep_dir')]
            
        # Duplicates
        c.duplicate = []
        c.sorted_ideas = sorted(c.ideas, key=lambda idea: idea.label)
        for i in range(len(c.sorted_ideas) - 1):
            if c.sorted_ideas[i].label == c.sorted_ideas[i+1].label:
                c.duplicate.append(c.sorted_ideas[i])
                c.duplicate.append(c.sorted_ideas[i+1])
                    
        return render('idea/data_integrity.%s' % filetype)
コード例 #27
0
ファイル: idea.py プロジェクト: nessc/inphosite
    def _list_property(self,
                       property,
                       id,
                       filetype='html',
                       limit=False,
                       sep_filter=False,
                       type='idea'):
        c.idea = h.fetch_obj(Idea, id)

        limit = int(request.params.get('limit', limit))
        start = int(request.params.get('start', 0))
        sep_filter = request.params.get('sep_filter', sep_filter)
        property = getattr(c.idea, property)
        if sep_filter:
            property = property.filter(Entity.sep_dir != '')

        # TODO: Fix hacky workaround for the AppenderQuery vs. Relationship
        # property issue - upgrading SQLAlchemy may fix this by allowing us to
        # use len() in a smart way.
        try:
            c.total = property.count()
        except TypeError:
            c.total = len(property)

        if limit:
            property = property[start:start + limit]

        c.entities = property
        c.nodes = Session.query(Node).filter(
            Node.parent_id == None).order_by("name").all()
        return render('%s/%s-list.%s' % (type, type, filetype))
コード例 #28
0
ファイル: graph.py プロジェクト: camerontt2000/inpho
 def get_subgraph(ids, thresh=None):
     edge_q = Session.query(IdeaGraphEdge)
     edge_q = edge_q.order_by(IdeaGraphEdge.jweight.desc())
     edge_q = edge_q.filter(IdeaGraphEdge.cons_id.in_(ids))
     edge_q = edge_q.filter(IdeaGraphEdge.ante_id.in_(ids))
     if thresh:
         edge_q = edge_q.filter(IdeaGraphEdge.jweight > thresh)
     return edge_q.all()
コード例 #29
0
 def get_subgraph(ids, thresh=None):
     edge_q = Session.query(IdeaGraphEdge)
     edge_q = edge_q.order_by(IdeaGraphEdge.jweight.desc())
     edge_q = edge_q.filter(IdeaGraphEdge.cons_id.in_(ids))
     edge_q = edge_q.filter(IdeaGraphEdge.ante_id.in_(ids))
     if thresh:
         edge_q = edge_q.filter(IdeaGraphEdge.jweight > thresh)
     return edge_q.all()
コード例 #30
0
ファイル: helpers.py プロジェクト: colinallen/inphosite
def get_user(login):
    """
    Returns the User object from the model.

    :rtype: :class:`inpho.model.User`
    """
    user = Session.query(User).filter(or_(User.email==login,
                                          User.username==login.lower())).first()
    return user
コード例 #31
0
ファイル: lists.py プロジェクト: inpho/evaluations
def make_list():
    idea = Session.query(Idea).get(646)
    headings = ['Related', 'Instances', 'Hyponyms']
    termslist = zip(idea.related[:10],
                    idea.instances[:10],
                    idea.hyponyms[:10])

    template = Template(filename='lists.mako.html')
    print template.render(termslist=termslist, headings=headings)
コード例 #32
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def complete_mining(entity_type=Idea,
                    filename='graph.txt',
                    root='./',
                    corpus_root='corpus/',
                    update_entropy=False,
                    update_occurrences=False,
                    update_db=False):
    occur_filename = os.path.abspath(root + "occurrences.txt")
    graph_filename = os.path.abspath(root + "graph-" + filename)
    edge_filename = os.path.abspath(root + "edge-" + filename)
    sql_filename = os.path.abspath(root + "sql-" + filename)

    doc_terms = doc_terms_list()

    if update_occurrences:
        print "processing articles..."
        process_articles(entity_type, occur_filename, corpus_root=corpus_root)

    print "filtering occurrences..."
    filter_apriori_input(occur_filename, graph_filename, entity_type,
                         doc_terms)

    print "running apriori miner..."
    dm.apriori(graph_filename, edge_filename)

    print "processing edges..."
    edges = dm.process_edges(graph_filename, edge_filename, occur_filename,
                             doc_terms)
    ents = dm.calculate_node_entropy(edges)
    edges = dm.calculate_edge_weight(edges, ents)

    print "creating sql files..."

    with open(sql_filename, 'w') as f:
        for edge, props in edges.iteritems():
            ante, cons = edge
            row = "%s::%s" % edge
            row += ("::%(confidence)s::%(jweight)s::%(weight)s"
                    "::%(occurs_in)s\n" % props)
            f.write(row)

    if update_entropy:
        print "updating term entropy..."

        for term_id, entropy in ents.iteritems():
            term = Session.query(Idea).get(term_id)
            if term:
                term.entropy = entropy

        Session.flush()
        Session.commit()
        Session.close()

    if update_db:
        print "updating the database..."
        update_graph(entity_type, sql_filename)
コード例 #33
0
ファイル: journal.py プロジェクト: colinallen/inphosite
    def data_integrity(self, filetype='html', redirect=False):
        if not h.auth.is_logged_in():
            abort(401)
        if not h.auth.is_admin():
            abort(403)

        journal_q = Session.query(Journal)
        
        # check for query
        if request.params.get('q'):
            journal_q = journal_q.filter(Journal.name.like(u'%'+request.params['q']+'%'))
        
        # get the list of journals
        c.journals = list(journal_q)

        c.missing_issn = []
        c.bad_issn = []
        for journal in c.journals:
            # Missing ISSN
            if not getattr(journal, 'ISSN') or journal.ISSN == '':
                c.missing_issn.append(journal)
            # Journal has bad ISSN format (xxxx-xxxx is good format)
            elif not re.match(r'[0-9]{4}-[0-9]{3}[0-9X]', journal.ISSN):
                c.bad_issn.append(journal)

        # Duplicates
        # It is set up for pairs. If there is more than 2 of the same journal it will have multiples
        c.duplicate = []
        c.sorted_journals = sorted(c.journals, key=lambda journal: journal.label)
        for i in range(len(c.sorted_journals) - 1):
            if c.sorted_journals[i].label == c.sorted_journals[i+1].label:
                c.duplicate.append(c.sorted_journals[i])
                c.duplicate.append(c.sorted_journals[i+1]) 

        # re-get the list of journals (only ones accessed in last 4 weeks)
        # Magic constant of 2419200 corresponds to 4 weeks in seconds
        c.journals = list(journal_q.filter(Journal.last_accessed < (time.time() -2419200)))

        # filter out results into different chunks
        # Valid URL, not found
        c.broken = [journal for journal in c.journals if journal.URL]
        
        # Journal is active, no URL set
        c.missing = [journal for journal in c.journals 
                     if journal.URL is None and journal.active]
        
        # Journal is active, URL is set to blank
        c.blank = [journal for journal in c.journals 
                   if journal.URL == '' and journal.active]
        
        # Jornal is inactive and missing URL
        c.inactive = [journal for journal in c.journals 
                      if journal.URL is None and not journal.active]
        
        return render('journal/data_integrity.' + filetype)
コード例 #34
0
def get_user(login):
    """
    Returns the User object from the model.

    :rtype: :class:`inpho.model.User`
    """
    if isinstance(login, str) or isinstance(login, unicode):
        user = Session.query(User).filter(
            or_(User.email == login, User.username == login.lower())).first()
        return user
    else:
        raise Exception(login)
コード例 #35
0
ファイル: auth.py プロジェクト: inpho/inphosite
def get_user(login):
    """
    Returns the User object from the model.

    :rtype: :class:`inpho.model.User`
    """
    if isinstance(login,str) or isinstance(login,unicode):
        user = Session.query(User).filter(or_(User.email==login,
                                              User.username==login.lower())).first()
        return user
    else:
        raise Exception(login)
コード例 #36
0
ファイル: account.py プロジェクト: colinallen/inphosite
    def review(self):
        if not request.environ.get('REMOTE_USER', False):
            abort(401)
        
        c.user = h.get_user(request.environ['REMOTE_USER'])        

        ieq = Session.query(IdeaEvaluation).order_by(IdeaEvaluation.time.desc())
        c.evaluations = ieq.filter(and_(IdeaEvaluation.uid==c.user.ID,
                                   or_(IdeaEvaluation.generality>-1,
                                       IdeaEvaluation.relatedness>-1))).all()
        
        return render('account/review.html')
コード例 #37
0
ファイル: sep.py プロジェクト: we1l1n/inpho
def doc_terms_list():
    articles = Session.query(Entity)
    articles = articles.filter(Entity.sep_dir != None)
    articles = articles.filter(Entity.sep_dir != '')
    articles = articles.all()

    doc_terms = defaultdict(list)

    for entity in articles:
        doc_terms[entity.sep_dir].append(entity)

    return doc_terms
コード例 #38
0
def doc_terms_list():
    articles = Session.query(Entity)
    articles = articles.filter(Entity.sep_dir!=None)
    articles = articles.filter(Entity.sep_dir!='')
    articles = articles.all()
   
    doc_terms = defaultdict(list)

    for entity in articles:
        doc_terms[entity.sep_dir].append(entity)
    
    return doc_terms
コード例 #39
0
def complete_mining(entity_type=Idea, filename='graph.txt', root='./',
                    corpus_root='corpus/', update_entropy=False,
                    update_occurrences=False, update_db=False): 
    occur_filename = os.path.abspath(root + "occurrences.txt")
    graph_filename = os.path.abspath(root + "graph-" + filename)
    edge_filename = os.path.abspath(root + "edge-" + filename)
    sql_filename = os.path.abspath(root + "sql-" + filename)

    doc_terms = doc_terms_list()

    if update_occurrences:
        print "processing articles..."
        process_articles(entity_type, occur_filename, corpus_root=corpus_root)

    print "filtering occurrences..."
    filter_apriori_input(
        occur_filename, graph_filename, entity_type, doc_terms)

    print "running apriori miner..."
    dm.apriori(graph_filename, edge_filename)
    
    print "processing edges..."
    edges = dm.process_edges(
        graph_filename, edge_filename, occur_filename, doc_terms)
    ents = dm.calculate_node_entropy(edges)
    edges = dm.calculate_edge_weight(edges, ents)
    
    print "creating sql files..."

    with open(sql_filename, 'w') as f:
        for edge, props in edges.iteritems():
            ante,cons = edge
            row = "%s::%s" % edge
            row += ("::%(confidence)s::%(jweight)s::%(weight)s"
                    "::%(occurs_in)s\n" % props)
            f.write(row)

    if update_entropy:
        print "updating term entropy..."

        for term_id, entropy in ents.iteritems():
            term = Session.query(Idea).get(term_id)
            if term:
                term.entropy = entropy

        Session.flush()
        Session.commit()
        Session.close()

    if update_db:
        print "updating the database..."
        update_graph(entity_type, sql_filename)
コード例 #40
0
ファイル: idea.py プロジェクト: colinallen/inphosite
    def _get_anon_evaluation(self, id, id2, ip, autoCreate=True):
        idea1 = h.fetch_obj(Idea, id, new_id=True)
        idea2 = h.fetch_obj(Idea, id2, new_id=True)

        evaluation_q = Session.query(AnonIdeaEvaluation)
        evaluation = evaluation_q.filter_by(ante_id=id, cons_id=id2, ip=ip).first()

        # if an evaluation does not yet exist, create one
        if autoCreate and not evaluation:
            evaluation = AnonIdeaEvaluation(id, id2,ip)
            Session.add(evaluation)

        return evaluation
コード例 #41
0
ファイル: entity.py プロジェクト: inpho/inphosite
    def related_entries(self, id, filetype="html"):
        c.entity = h.fetch_obj(Entity, id)

        related = sep.get_related()
        related = related[c.entity.sep_dir]

        c.entities = []
        for sep_dir in related:
            entity = Session.query(Entity).filter(Entity.sep_dir == sep_dir).first()
            if entity is not None:
                c.entities.append(entity)

        return render("entity/entity-list.%s" % (filetype))
コード例 #42
0
ファイル: idea.py プロジェクト: colinallen/inphosite
    def evaluation(self, id, id2):
        c.entity = h.fetch_obj(Idea, id)
        c.entity2 = h.fetch_obj(Entity, id2)
        if isinstance(c.entity2, Node):
            c.entity2 = c.entity2.idea
            id2 = c.entity2.ID
        if not isinstance(c.entity2, Idea):
            # no evaluation implemented
            response.status_int = 501

            return ''

        c.edit = True
        c.alert = request.params.get('alert', True)
       
        # retrieve evaluation for pair
        c.generality = int(request.params.get('generality', -1))
        c.relatedness = int(request.params.get('relatedness', -1))
        
        # retrieve user information
        identity = request.environ.get('repoze.who.identity')
        c.uid = None if not identity else identity['user'].ID
        
        #TODO: Place cookie auth here
        try:
            cookie = request.params.get('cookieAuth', 'null')
            username = h.auth.get_username_from_cookie(cookie) or ''
            user = h.get_user(username)
            if user is not None:
                c.uid = user.ID

        except ValueError:
            # invalid IP, abort
            abort(403)

        # use the user's evaluation if present, otherwise a null eval
        if c.uid and (c.generality == -1 or c.relatedness == -1):
            eval_q = Session.query(IdeaEvaluation.generality, 
                                       IdeaEvaluation.relatedness)
            eval_q = eval_q.filter_by(uid=c.uid, ante_id=id, cons_id=id2)

            c.generality, c.relatedness = eval_q.first() or\
                (int(request.params.get('generality', -1)), 
                 int(request.params.get('relatedness', -1)))


        if c.relatedness != -1:
            c.edit = request.params.get('edit', False)

        return render('idea/eval.html')
コード例 #43
0
ファイル: idea.py プロジェクト: nessc/inphosite
    def _get_anon_evaluation(self, id, id2, ip, autoCreate=True):
        idea1 = h.fetch_obj(Idea, id, new_id=True)
        idea2 = h.fetch_obj(Idea, id2, new_id=True)

        evaluation_q = Session.query(AnonIdeaEvaluation)
        evaluation = evaluation_q.filter_by(ante_id=id, cons_id=id2,
                                            ip=ip).first()

        # if an evaluation does not yet exist, create one
        if autoCreate and not evaluation:
            evaluation = AnonIdeaEvaluation(id, id2, ip)
            Session.add(evaluation)

        return evaluation
コード例 #44
0
    def related_entries(self, id, filetype='html'):
        c.entity = h.fetch_obj(Entity,id)
        
        related = sep.get_related()
        related = related[c.entity.sep_dir]
       
        c.entities = [] 
        for sep_dir in related:
            entity = Session.query(Entity).filter(Entity.sep_dir==sep_dir).first()
            if entity is not None:
                c.entities.append(entity)


        return render('entity/entity-list.%s' %(filetype))
コード例 #45
0
ファイル: idea.py プロジェクト: nessc/inphosite
    def evaluation(self, id, id2):
        c.entity = h.fetch_obj(Idea, id)
        c.entity2 = h.fetch_obj(Entity, id2)
        if isinstance(c.entity2, Node):
            c.entity2 = c.entity2.idea
            id2 = c.entity2.ID
        if not isinstance(c.entity2, Idea):
            # no evaluation implemented
            response.status_int = 501

            return ''

        c.edit = True
        c.alert = request.params.get('alert', True)

        # retrieve evaluation for pair
        c.generality = int(request.params.get('generality', -1))
        c.relatedness = int(request.params.get('relatedness', -1))

        # retrieve user information
        identity = request.environ.get('repoze.who.identity')
        c.uid = None if not identity else identity['user'].ID

        #TODO: Place cookie auth here
        try:
            cookie = request.params.get('cookieAuth', 'null')
            username = h.auth.get_username_from_cookie(cookie) or ''
            user = h.get_user(username)
            if user is not None:
                c.uid = user.ID

        except ValueError:
            # invalid IP, abort
            abort(403)

        # use the user's evaluation if present, otherwise a null eval
        if c.uid and (c.generality == -1 or c.relatedness == -1):
            eval_q = Session.query(IdeaEvaluation.generality,
                                   IdeaEvaluation.relatedness)
            eval_q = eval_q.filter_by(uid=c.uid, ante_id=id, cons_id=id2)

            c.generality, c.relatedness = eval_q.first() or\
                (int(request.params.get('generality', -1)),
                 int(request.params.get('relatedness', -1)))

        if c.relatedness != -1:
            c.edit = request.params.get('edit', False)

        return render('idea/eval.html')
コード例 #46
0
    def review(self):
        if not request.environ.get('REMOTE_USER', False):
            abort(401)

        c.user = h.get_user(request.environ['REMOTE_USER'])

        ieq = Session.query(IdeaEvaluation).order_by(
            IdeaEvaluation.time.desc())
        c.evaluations = ieq.filter(
            and_(
                IdeaEvaluation.uid == c.user.ID,
                or_(IdeaEvaluation.generality > -1,
                    IdeaEvaluation.relatedness > -1))).all()

        return render('account/review.html')
コード例 #47
0
ファイル: taxonomy.py プロジェクト: inpho/inpho
def from_dlv(filename, load_obj=False):
    """
    Function to build a taxonomy from the specified DLV output file.
    """
    # build regex for instance and link search
    regex_class = re.compile("class\(i(\d+)\)")
    regex_ins = re.compile("[ins|isa]\(i(\d+),i(\d+)\)")
    regex_links = re.compile("link\(i(\d+),i(\d+)\)")

    # process DLV output file
    with open(filename) as f:
        dlv = f.read()

        classes = frozenset(regex_class.findall(dlv))
        instances = frozenset(regex_ins.findall(dlv))
        links = frozenset(regex_links.findall(dlv))

    # set up taxonomy structure
    nodes = defaultdict(Node)
    root = Node("Philosophy", spine=True)

    # populate instances
    for child, parent in instances:
        nodes[parent].graft(nodes[child])

    # populate links
    for target, source in links:
        nodes[source].links.add(nodes[target])

    # glue taxonomies together, initialize values
    for key,node in nodes.iteritems():
        # load the database objects 
        if load_obj:
            node.value = Session.query(Entity).get(key)
        else:
            node.value = key

        # specify hand-built portion of the taxonomy
        if node.value in classes:
            node.spine = True

        # if this is a root, glue it to the Philosophy node.
        if node.parent is None:
            root.graft(node)

    return root
コード例 #48
0
def from_dlv(filename, load_obj=False):
    """
    Function to build a taxonomy from the specified DLV output file.
    """
    # build regex for instance and link search
    regex_class = re.compile("class\(i(\d+)\)")
    regex_ins = re.compile("[ins|isa]\(i(\d+),i(\d+)\)")
    regex_links = re.compile("link\(i(\d+),i(\d+)\)")

    # process DLV output file
    with open(filename) as f:
        dlv = f.read()

        classes = frozenset(regex_class.findall(dlv))
        instances = frozenset(regex_ins.findall(dlv))
        links = frozenset(regex_links.findall(dlv))

    # set up taxonomy structure
    nodes = defaultdict(Node)
    root = Node("Philosophy", spine=True)

    # populate instances
    for child, parent in instances:
        nodes[parent].graft(nodes[child])

    # populate links
    for target, source in links:
        nodes[source].links.add(nodes[target])

    # glue taxonomies together, initialize values
    for key, node in nodes.iteritems():
        # load the database objects
        if load_obj:
            node.value = Session.query(Entity).get(key)
        else:
            node.value = key

        # specify hand-built portion of the taxonomy
        if node.value in classes:
            node.spine = True

        # if this is a root, glue it to the Philosophy node.
        if node.parent is None:
            root.graft(node)

    return root
コード例 #49
0
ファイル: sep.py プロジェクト: camerontt2000/inpho
def process_articles(entity_type=Entity, output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    # fix search patterns
    for term in terms:
        newpatterns = []
        for pattern in term.searchpatterns:
            if '(' in pattern and ')' in pattern:
                pattern = pattern.replace('( ', '(\\b')
                pattern = pattern.replace(' )', '\\b)')
            else:
                pattern = '\\b%s\\b' % pattern.strip()

            newpatterns.append(pattern)

        term.searchpatterns = newpatterns

    
    articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir!=None)
    articles = articles.filter(Entity.sep_dir!='')
    articles = articles.distinct().all()
    articles = [a[0] for a in articles]
   
    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root) for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    #serial processing for tests
    '''
    doc_lines = []
    for title in articles:
        lines = process_article(title, terms, entity_type, None, corpus_root)
        doc_lines.append(lines)
    '''

    # write graph output to file
    print output_filename
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #50
0
    def load_check(self, filetype="html", redirect=False):
        if not h.auth.is_logged_in():
            abort(401)
        if not h.auth.is_admin():
            abort(403)

        entity_q = Session.query(Entity)
        c.entities = list(entity_q)

        c.load_error = []

        for entity in c.entities:
            try:
                urlopen(h.url('https://www.inphoproject.org', getattr(entity, 'url')))
            except Exception as e:
                c.load_error.append(entity)

        return render('entity/load_check.' + filetype)
コード例 #51
0
ファイル: sep.py プロジェクト: etboggs/inpho
def process_articles(entity_type=Entity, output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()
    
    articles = Session.query(entity_type).filter(entity_type.sep_dir!='').all()
   
    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root) for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    # write graph output to file
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #52
0
ファイル: entity.py プロジェクト: inpho/inphosite
    def load_check(self, filetype="html", redirect=False):
        if not h.auth.is_logged_in():
            abort(401)
        if not h.auth.is_admin():
            abort(403)

        entity_q = Session.query(Entity)
        c.entities = list(entity_q)

        c.load_error = []

        for entity in c.entities:
            try:
                urlopen(h.url('https://www.inphoproject.org', getattr(entity, 'url')))
            except Exception as e:
                c.load_error.append(entity)

        return render('entity/load_check.' + filetype)
コード例 #53
0
def fuzzymatch_all(string1):
    """
    Takes a string and returns all potential fuzzymatches from the Entity
    database. Matches are returned as a list of (entity,confidence) tuples.
    """
    # construct Entity query
    entities = Session.query(Entity)
    entities = entities.filter(Entity.typeID != 2)  # exclude nodes
    entities = entities.filter(Entity.typeID != 4)  # exclude journals

    # initialize result object
    matches = []

    # build results
    for entity in entities:
        confidence, distance = fuzzymatch(string1, entity.label)
        if confidence >= 0.5:
            matches.append((entity, confidence))

    return matches
コード例 #54
0
    def data_integrity(self, filetype="html", redirect=False):
        if not h.auth.is_logged_in():
            abort(401)
        if not h.auth.is_admin():
            abort(403)

        entity_q = Session.query(Entity)
        c.entities = list(entity_q)

        c.missing_sep_dir = []
        c.mult_sep_dir = []

        for entity in c.entities:
            if not getattr(entity, 'sep_dir'):
                c.missing_sep_dir.append(entity)
            else:
                for comp_entity in c.entities:
                    if getattr(entity, 'sep_dir') == getattr(comp_entity, 'sep_dir') and entity != comp_entity:
                       c.mult_sep_dir.append(getattr(entity, 'sep_dir'))

        return render('entity/data_integrity.' + filetype)
コード例 #55
0
ファイル: sep.py プロジェクト: etboggs/inpho
def process_articles(entity_type=Entity,
                     output_filename='output-all.txt',
                     corpus_root='corpus/'):
    terms = select_terms(entity_type)
    Session.expunge_all()
    Session.close()

    articles = Session.query(entity_type).filter(
        entity_type.sep_dir != '').all()

    # parallel processing of articles
    p = Pool()
    args = [(title, terms, entity_type, None, corpus_root)
            for title in articles]
    doc_lines = p.map(process_wrapper, args)
    p.close()

    # write graph output to file
    with open(output_filename, 'w') as f:
        for lines in doc_lines:
            f.writelines(lines)
コード例 #56
0
    def _delete_evaluation(self, evaltype, id, id2, uid=None, username=None):
        id2 = request.params.get('id2', id2)
        uid = request.params.get('uid', uid)
        username = request.params.get('username', username)

        # look for a specific user's feedback
        evaluation = self._get_evaluation(evaltype,
                                          id,
                                          id2,
                                          uid,
                                          username,
                                          autoCreate=False)

        # if that feedback does not exist, unleash the nuclear option and delete
        # ALL evaluation facts for this relation, wiping it from the database.
        if h.auth.is_admin() and not evaluation:
            eval_q = Session.query(evaltype)
            eval_q = eval_q.filter_by(ante_id=id, cons_id=id2)
            evals = eval_q.all()

            # wipe them out. all of them.
            for evaluation in evals:
                h.delete_obj(evaluation)

            # return ok, with how many were deleted
            response.status_int = 200
            return "OK %d" % len(evals)

        elif not evaluation:
            abort(404)  # simply return an error (not evaluated), if not admin

        current_uid = h.get_user(request.environ['REMOTE_USER']).ID
        if evaluation.uid != current_uid and not h.auth.is_admin():
            abort(401)

        h.delete_obj(evaluation)

        response.status_int = 200
        return "OK"
コード例 #57
0
def fetch_obj(type, id, error=404, new_id=False):
    """
    Fetches the object with the given id from the collection of type type. If
    the object does not exist, throw an HTTP error (default: 404 Not Found).

    :param type: object type
    :type type: class in :mod:`inpho.model`
    :param id: object id
    :type id: integer or None
    :param error: HTTP error code.
    :rtype: *type*
    """
    if id is None:
        abort(error)
    obj_q = Session.query(type)
    obj = obj_q.get(int(id))
    #else:
    #    obj = obj_q.filter(type.ID==int(id)).first()

    if obj is None:
        abort(error)
    return obj
コード例 #58
0
    def data_integrity(self, filetype='html', redirect=False):
        if not h.auth.is_logged_in():
            abort(401)
        if not h.auth.is_admin():
            abort(403)

        school_q = Session.query(SchoolOfThought)
        c.schools = list(school_q)

        # Missing sep_dir
        c.missing_sep_dir = [
            school for school in c.schools if not getattr(school, "sep_dir")
        ]

        # Duplicates
        c.duplicate = []
        c.sorted_schools = sorted(c.schools, key=lambda school: school.label)
        for i in range(len(c.sorted_schools) - 1):
            if c.sorted_schools[i].label == c.sorted_schools[i + 1].label:
                c.duplicate.append(c.sorted_schools[i])
                c.duplicate.append(c.sorted_schools[i + 1])

        return render('school_of_thought/data_integrity.%s' % filetype)