Ejemplo n.º 1
0
 def tablize(self, data):
     """
     Convert a list of data into a table.
     """
     # Currently first tries a relatively fast 'tablize' that doesn't deal with nested lists
     # falling back to the 'old' code if the data isnot a list of (nested) dicts
     #TODO: [WvA] do we actually need the old codepath, ie do we ever have nested lists?
     try:
         return self.fast_tablize(data)
     except ValueError:
         pass
     # First, flatten the data (i.e., convert it to a list of
     # dictionaries that are each exactly one level deep).  The key for
     # each item designates the name of the column that the item will
     # fall into.
     data = self.flatten_data(data)
     #import json; print(json.dumps(data, indent=2))
     # Get the set of all unique headers, and sort them.
     headers = OrderedDict()
     for item in data:
         for k, v in item.items():
             if k not in headers:
                 headers[k] = set()
             headers[k].add(type(v))
     table = table3.ObjectTable(rows=data)
     for header in headers:
         fieldtype = headers[header]
         if len(fieldtype) == 1:
             fieldtype = list(fieldtype)[0]
         else:
             fieldtype = None
         fieldtype = {bool:str, type(None):str}.get(fieldtype, fieldtype)
         table.add_column(label=header, col=partial(lambda key, item: item.get(key, None), header), fieldtype=fieldtype)
     return table
Ejemplo n.º 2
0
    def get_table(self, codingjobs, export_level, **kargs):
        codingjobs = CodingJob.objects.prefetch_related("coded_articles__codings__values").filter(pk__in=codingjobs)

        # Get all row of table
        self.progress_monitor.update(5, "Preparing Jobs")
        rows = list(_get_rows(
            codingjobs, include_sentences=(int(export_level) != CODING_LEVEL_ARTICLE),
            include_multiple=True, include_uncoded_articles=False,
            progress_monitor=self.progress_monitor
            ))


        table = table3.ObjectTable(rows=rows)
        self.progress_monitor.update(5, "Preparing columns")

        # Meta field columns
        for field in _METAFIELDS:
            if self.options.get("meta_{field.object}_{field.attr}".format(**locals())):
                if field.object == "subsentence":
                    table.addColumn(SubSentenceColumn(field))
                else:
                    table.addColumn(MetaColumn(field))

        # Build columns based on form schemafields
        for schemafield in self.bound_form.schemafields:
            prefix = _get_field_prefix(schemafield)
            if self.options[prefix+"_included"]:
                options = {k[len(prefix)+1:] :v for (k,v) in self.options.iteritems() if k.startswith(prefix)}

                for label, function in schemafield.serialiser.get_export_columns(**options):
                    table.addColumn(CodingColumn(schemafield, label, function))
        return table
Ejemplo n.º 3
0
    def get_table(self,
                  assocTable,
                  rowheader_label="interval",
                  rowheader_type=str,
                  cell_type=float):

        intervals = sorted({i for (i, q, q2, p) in assocTable})
        assocs = {(x, y) for (i, x, y, s) in assocTable}
        cols = {u"{x}\u2192{y}".format(x=x, y=y): (x, y) for (x, y) in assocs}
        scores = {(i, x, y): s for (i, x, y, s) in assocTable}

        colnames = sorted(cols)

        result = table3.ObjectTable(rows=intervals)
        result.addColumn(
            table3.ObjectColumn(label=rowheader_label,
                                cellfunc=lambda row: row,
                                fieldtype=rowheader_type))

        for col, (x, y) in sorted(cols.iteritems()):
            result.addColumn(
                table3.ObjectColumn(label=unicode(col),
                                    fieldtype=cell_type,
                                    cellfunc=partial(getscore, scores, x, y)))

        return result
Ejemplo n.º 4
0
    def tablize(self, data):
        """
        Convert a list of data into a table.
        """
        # First, flatten the data (i.e., convert it to a list of
        # dictionaries that are each exactly one level deep).  The key for
        # each item designates the name of the column that the item will
        # fall into.
        data = self.flatten_data(data)

        # Get the set of all unique headers, and sort them.
        headers = OrderedDict()
        for item in data:
            for k, v in item.iteritems():
                if k not in headers:
                    headers[k] = set()
                headers[k].add(type(v))

        table = table3.ObjectTable(rows=data)
        for header in headers:
            fieldtype = headers[header]
            if len(fieldtype) == 1:
                fieldtype = list(fieldtype)[0]
            else:
                fieldtype = None
            fieldtype = {bool: str, type(None): str}.get(fieldtype, fieldtype)
            table.addColumn(label=header,
                            col=partial(lambda key, item: item.get(key, None),
                                        header),
                            fieldtype=fieldtype)

        return table
Ejemplo n.º 5
0
def parent_table(codebook):
    result = table3.ObjectTable(rows=codebook.codebookcodes)
    result.addColumn(lambda row: row.code.uuid, label="uuid")
    result.addColumn(lambda row: row.code.id, label="code_id")
    result.addColumn(lambda row: row.code, label="code")
    result.addColumn(lambda row: row.parent, label="parent")
    return result
Ejemplo n.º 6
0
 def parent_table(self, codebook, labelcols):
     result = table3.ObjectTable(rows=codebook.codebookcodes)
     result.add_column(lambda row: str(row.code.uuid), label="uuid")
     result.add_column(lambda row: row.code.id, label="code_id")
     result.add_column(lambda row: getattr(row.parent, "id", None),
                       label="parent_id")
     result.add_column(lambda row: row.code.label, label="label")
     self.add_label_columns(result)
     return result
Ejemplo n.º 7
0
    def get_table(self, codingjobs, export_level, include_uncoded_sentences=False,
                  include_uncoded_articles=False, **kargs):
        codingjobs = CodingJob.objects.prefetch_related("coded_articles__codings__values").filter(pk__in=codingjobs)

        # Get all row of table
        self.progress_monitor.update(5, "Preparing Jobs")
        rows = list(_get_rows(
            codingjobs, include_sentences=(int(export_level) != CODING_LEVEL_ARTICLE),
            include_multiple=True, include_uncoded_articles=include_uncoded_articles,
            include_uncoded_sentences=include_uncoded_sentences,
            progress_monitor=self.progress_monitor
        ))

        table = table3.ObjectTable(rows=rows)
        self.progress_monitor.update(5, "Preparing columns")

        # Meta field columns
        for field in _METAFIELDS:
            if self.options.get("meta_{field.object}_{field.attr}".format(**locals())):
                if field.object == "subsentence":
                    table.addColumn(SubSentenceColumn(field))
                elif field.attr == "date":
                    table.addColumn(DateColumn(field.label, kargs["date_format"]))
                else:
                    table.addColumn(MetaColumn(field))

        # Date formatting (also belongs to meta)
        for id, label, strftime in DATE_FORMATS:
            if self.options.get("meta_{id}".format(id=id)):
                table.addColumn(DateColumn(label, strftime))

        for field_name in AGGREGATABLE_FIELDS:
            codebook = self.options.get("aggregation_{field_name}".format(field_name=field_name))
            language = self.options.get("aggregation_{field_name}_language".format(field_name=field_name))
            not_found = self.options.get("aggregation_{field_name}_default".format(field_name=field_name))

            if not codebook:
                continue

            codebook.cache_labels(language)
            table.addColumn(MappingMetaColumn(
                _MetaField("article", field_name, field_name + " aggregation"),
                codebook.get_aggregation_mapping(language), not_found
            ))

        # Build columns based on form schemafields
        for schemafield in self.bound_form.schemafields:
            prefix = _get_field_prefix(schemafield)
            if self.options[prefix + "_included"]:
                options = {k[len(prefix) + 1:]: v for (k, v) in self.options.iteritems() if k.startswith(prefix)}

                for label, function in schemafield.serialiser.get_export_columns(**options):
                    table.addColumn(CodingColumn(schemafield, label, function))
        return table
Ejemplo n.º 8
0
 def values_table(self, unit_codings=False):
     """
     Return the coded values in this job as a table3.Table with codings as rows
     and the fields in the columns; cells contain serialised values. 
     """
     schema_id = self.unitschema_id if unit_codings else self.articleschema_id
     fields = CodingSchemaField.objects.filter(codingschema=schema_id)
     columns = [SchemaFieldColumn(field) for field in fields]
     codings = Coding.objects.filter(codingjob=self, sentence__isnull=(not unit_codings))
     codings = codings.prefetch_related("values", "values__field")
     codings = list(codings)
     return table3.ObjectTable(rows=codings, columns=columns)
Ejemplo n.º 9
0
def tree_table(codebook):
    rows = list(_get_tree(codebook))

    result = table3.ObjectTable(rows=rows)
    result.addColumn(lambda row: row.code.uuid, label="uuid")
    result.addColumn(lambda row: row.code.id, label="code_id")

    depth = max(row.indent for row in rows) + 1
    for i in range(depth):
        result.addColumn(TreeCodeColumn(i))

    return result
Ejemplo n.º 10
0
    def tree_table(self, codebook, language, labelcols):
        rows = list(_get_tree(codebook))

        result = table3.ObjectTable(rows=rows)
        result.addColumn(lambda row: row.code.uuid, label="uuid")
        result.addColumn(lambda row: row.code.id, label="code_id")
        self.add_label_columns(result)
        depth = max(row.indent for row in rows) + 1
        for i in range(depth):
            result.addColumn(TreeCodeColumn(i, language))

        return result
Ejemplo n.º 11
0
def dict_to_columns(table,
                    rowheader_label="group",
                    rowheader_type=str,
                    cell_type=int):
    result = table3.ObjectTable(rows=table.getRows())
    result.addColumn(
        table3.ObjectColumn(label=rowheader_label,
                            cellfunc=lambda row: row,
                            fieldtype=rowheader_type))
    for col in table.getColumns():
        result.addColumn(
            table3.ObjectColumn(label=unicode(col),
                                cellfunc=partial(table.getValue, column=col),
                                fieldtype=cell_type))
    return result
Ejemplo n.º 12
0
def get_nukes(sentence, transformer, statements_without_object):
    """Return a sequence of statements extracted from the roles"""
    read_node = lambda n : int(n) if n.strip() else None
    roles = [(read_node(s), p.replace(AMCAT, ""), read_node(o)) for (s,p,o) in 
             transformer.query(select=["?spos", "?p", "?opos"],
                               where="""?s ?p [:position ?opos] OPTIONAL {?s :position ?spos}
                                        FILTER (?p IN (:su, :obj, :quote, :eqv,  :om))""")] 

    statements = list(get_statements(sentence, roles, statements_without_object))
    statements = [fill_out_statement(sentence, statement, roles) for statement in statements]
    statements = list(resolve_equivalence(statements))
    statements = [add_frames(s) for s in statements]
	
    nuketable = table3.ObjectTable(rows = statements)

    nuketable.addColumn(lambda s : "/".join(s.type), "type")
    for col in "source", "subject", "predicate", "condition", "object":
        nuketable.addColumn(partial(Statement.get_lemmata, position=col), col)
    nuketable.addColumn(lambda s : s.frames, "frames")

    return nuketable
Ejemplo n.º 13
0
    def get_response(self):
        r = self.read_network(self.options['network'])
        graph = self.get_graph(r)
        html = graph.getHTMLObject()
        dot = graph.getDot()
        edges = list(itertools.chain(*graph.edges.values()))
        t = table3.ObjectTable(rows=edges)

        def fmt(f, fmt="%1.1f"):
            if f is None: return ""
            return fmt % f

        t.addColumn(lambda e: e.subj.id, "subject")
        t.addColumn(lambda e: e.obj.id, "object")
        t.addColumn(lambda e: fmt(e.weight), "weight")
        t.addColumn(lambda e: fmt(e.sign, fmt="%+1.2f"), "quality")
        t.addColumn(lambda e: e.pred or "", "predicate")
        t.addColumn(lambda e: e.graph, "subgraph")

        html += tableoutput.table2html(t, printRowNames=False)

        html += "<pre>{dot}</pre>".format(**locals())
        return HttpResponse(html, status=200, mimetype="text/html")
Ejemplo n.º 14
0
    def fast_tablize(self, data):
        if not isinstance(data, list):
            raise ValueError("fast_tablize needs a list of (nested) dicts!")            
        
        def _get_keys(item, prefix=()):
            for key, val in item.items():
                if isinstance(val, list):
                    raise ValueError("fast_tablize needs a list of (nested) dicts (not nested lists)!")            
                if isinstance(val, dict):
                    for nested_key, _type in _get_keys(val):
                        yield (key,) + nested_key, _type
                else:
                    yield (key,), type(val)    
                        
        def _get_val(d, key):
            val = d.get(key[0])
            if val is None or len(key) == 1:
                return val
            return _get_val(val, key[1:])

        keys = OrderedDict()
        for row in data:
            for key, _type in _get_keys(row):
                name = ".".join(key)
                if name in keys:
                    keys[name][1].add(_type)
                else:
                    keys[name] = (key, {_type})
                    
        table = table3.ObjectTable(rows=data)
        for col, (key, types) in keys.items():
            fieldtype = list(types)[0] if len(types) == 1 else None
            fieldtype = {bool:str, type(None):str}.get(fieldtype, fieldtype)
            table.add_column(label=col, col=partial(_get_val, key=key), fieldtype=fieldtype)

        return table
Ejemplo n.º 15
0
if len(sys.argv) > 1:
    outdir = sys.argv[1]
    wwwroot = 'file://{outdir}'.format(**locals())
else:
    outdir = OUTDIR_DEFAULT
    wwwroot = WWWROOT_DEFAULT

log = amcatlogging.setup()

if not os.path.exists(outdir): os.makedirs(outdir)  # make sure target exists

script = sys.argv[0]
log.info("Starting documentation by {script} at {stamp}".format(**locals()))

doc = table3.ObjectTable()
test = table3.ObjectTable()

for reponame in REPONAMES:
    repolocation = REPOLOC.format(**locals())
    # clone repositorymkdtemp
    tmpdir = tempfile.mkdtemp()
    repodir = '{tmpdir}/{reponame}'.format(**locals())
    log.info(
        "{reponame}: Cloning {repolocation} to {repodir}".format(**locals()))
    repo = hg.clone(repolocation, repodir)

    for branch in repo.listbranches():
        row = dict(repo=reponame, branch=branch)
        doc.rows.append(row)
Ejemplo n.º 16
0
def index(request):
    # build table with gold standard sentences
    ruleset = request.GET.get('ruleset', '').lower()
    if ruleset: ruleset = "_" + ruleset

    goldfile = GOLDFILE.format(**locals())
    grammarfile = GRAMMARFILE.format(**locals())
    g, gold_relations =get_gold(goldfile)
    comments = get_gold_comments(goldfile)

    # if rules are modified, store current values
    grammar_modified = os.path.getmtime(grammarfile)
    store_score = request.session.get('grammartime', None) != grammar_modified
    request.session['grammartime'] = grammar_modified

    sentences = AnalysisSentence.objects.filter(pk__in=g.keys())

    metrics = {} # (sid, "tp"/"fn"/"fp") : score
    tt = get_tt(ruleset, gold_relations)
    for sentence in sentences:
        tt.load_sentence(sentence.id)
        tt.apply_lexical()
        tt.apply_rules()


        found = set(tt.get_roles())
        print "--->", found
        gold = g[sentence.id]
        gold = set(do_gold_reality(found, gold))
        tp = len(gold & found)
        fp = len(found - gold)
        fn = len(gold - found)
        pr = tp / float(tp + fp) if (tp + fp) else None
        re = tp / float(tp + fn) if (tp + fn) else None
        f = 2 * pr * re / (pr + re) if (pr or re) else 0
        if tp + fp + fn == 0: f = None
        for metric in "tp fp fn pr re f".split():
            metrics[sentence.id, metric] = locals()[metric]
        key = "semanticroles_fscore_%i" % sentence.id
        previous = request.session.get(key, None)
        metrics[sentence.id, "prev"] = "" if previous is None else previous
        metrics[sentence.id, "diff"] = "" if previous is None else colorize((f or 0) - previous)
        if store_score:
            request.session[key] = f

	    
	    

    sentencetable = table3.ObjectTable(rows=sentences)
    sentencetable.addColumn(lambda s : "<a href='{url}?ruleset={ruleset}'>{s.id}</a>".format(url=reverse('semanticroles-sentence', args=[s.id]), ruleset=ruleset[1:], s=s), "ID")
    sentencetable.addColumn(lambda s : unicode(s.sentence.sentence)[:60], "Sentence")
    sentencetable.addColumn(lambda s : "<br/>".join(comments.get(s.id, [])), "Remarks")
    def get_metric(metric, sentence):
        
        result = metrics[sentence.id, metric]
        if result is None: result = ""
        if isinstance(result, float): result = "%1.2f" % result
        return result
    for metric in ("tp","fp","fn", "f", "prev", "diff"):
        sentencetable.addColumn(partial(get_metric, metric), metric)

    sentencetablehtml = tableoutput.table2htmlDjango(sentencetable, safe=True)
   
    print grammar_modified, store_score
    
    return render(request, "navigator/semanticroles/index.html", locals())
Ejemplo n.º 17
0
    writer = pspp.next()
    writer = EchoWriter(writer)
    log.debug("Creating SPS script and sending to PSPP")
    table2spss(t, writer=writer, saveas=filename)
    log.debug("Closing PSPP")
    out, err = pspp.next()
    log.debug("PSPPP err: %s" % err)
    log.debug("PSPPP out: %s" % out)
    err = err.replace('pspp: error creating "pspp.jnl": Permission denied', '')
    err = err.replace(
        'pspp: ascii: opening output file "pspp.list": Permission denied', '')
    if err.strip():
        raise Exception(err)
    if "error:" in out.lower():
        raise Exception("PSPP Exited with error: \n\n%s" % out)
    if not os.path.exists(filename):
        raise Exception(
            "PSPP Exited without errors, but file was not saved.\n\nOut=%r\n\nErr=%r"
            % (out, err))
    return filename


if __name__ == '__main__':
    db = dbtoolkit.amcatDB()
    cj = codingjob.CodingJob(db, 4534)
    t = table3.ObjectTable(rows=codingjob.getCodedSentencesFromCodingjobs([cj
                                                                           ]),
                           columns=map(SPSSFieldColumn, cj.unitSchema.fields))

    print table2sav(t)