Beispiel #1
0
    waseem_hovy_tr = os.path.join("data", "amateur_expert.tr.json")

    sexism_file_de = os.path.join("data", "waseem_s.dv.json")
    racism_file_de = os.path.join("data", "waseem_r.dv.json")
    neither_file_de = os.path.join("data", "waseem_n.dv.json")
    waseem_hovy_de = os.path.join("data", "amateur_expert.dv.json")

    sexism_file_te = os.path.join("data", "waseem_s.te.json")
    racism_file_te = os.path.join("data", "waseem_r.te.json")
    neither_file_te = os.path.join("data", "waseem_n.te.json")
    waseem_hovy_te = os.path.join("data", "amateur_expert.te.json")

    csvreader = CSVReader(encoding="ISO-8859-1")
    jlr = JSONLineReader()
    formatter = TextAnnotationFormatter(WaseemLabelSchema(), preprocessing=pp)
    formatter2 = TextAnnotationFormatter(WaseemHovyLabelSchema(),
                                         preprocessing=pp,
                                         mapping={
                                             0: 0,
                                             1: 1,
                                             2: 2,
                                             3: 0
                                         })
    df = DavidsonFormatter(DavidsonToZLabelSchema(),
                           preprocessing=pp,
                           mapping={
                               0: 0,
                               1: 1,
                               2: 2
                           })
Beispiel #2
0
    waseem_hovy_tr = os.path.join("data","amateur_expert.tr.json")

    sexism_file_de = os.path.join("data","waseem_s.dv.json")
    racism_file_de = os.path.join("data","waseem_r.dv.json")
    neither_file_de = os.path.join("data","waseem_n.dv.json")
    waseem_hovy_de = os.path.join("data","amateur_expert.dv.json")

    sexism_file_te = os.path.join("data","waseem_s.te.json")
    racism_file_te = os.path.join("data","waseem_r.te.json")
    neither_file_te = os.path.join("data","waseem_n.te.json")
    waseem_hovy_te = os.path.join("data","amateur_expert.te.json")

    csvreader = CSVReader(encoding="ISO-8859-1")
    jlr = JSONLineReader()
    formatter = TextAnnotationFormatter(WaseemLabelSchema(),preprocessing=pp)
    formatter2 = TextAnnotationFormatter(WaseemHovyLabelSchema(),preprocessing=pp,mapping={0:0,1:1,2:2,3:0})
    df = DavidsonFormatter(DavidsonToZLabelSchema(),preprocessing=pp,mapping={0:0,1:1,2:2})


    datasets_tr = [
        DataSet(file=sexism_file_tr, reader=jlr, formatter=formatter,name=None),
        DataSet(file=racism_file_tr, reader=jlr, formatter=formatter,name=None),
        DataSet(file=neither_file_tr, reader=jlr, formatter=formatter,name=None),
        DataSet(file=waseem_hovy_tr, reader=jlr, formatter=formatter2,name=None),
        DataSet(os.path.join("data", "davidson.tr.csv"), reader=csvreader, formatter=df, name="davidson_train")
    ]

    datasets_de = [
        DataSet(file=sexism_file_de, reader=jlr, formatter=formatter,name=None),
        DataSet(file=racism_file_de, reader=jlr, formatter=formatter,name=None),
        DataSet(file=neither_file_de, reader=jlr, formatter=formatter,name=None),