Ejemplo n.º 1
0
 def get_doc_line(self,doc,line):
     lines = self.db.get_doc_lines(doc)
     if line > -1:
         return lines.split("\n")[line].split("\t")[1]
     else:
         non_empty_lines = [line.split("\t")[1] for line in lines.split("\n") if len(line.split("\t"))>1 and len(line.split("\t")[1].strip())]
         return non_empty_lines[SimpleRandom.get_instance().next_rand(0,len(non_empty_lines)-1)]
Ejemplo n.º 2
0
    def get_doc_line(self, doc, line):
        lines = self.doc_db.get_doc_lines(doc)

        if os.getenv("PERMISSIVE_EVIDENCE",
                     "n").lower() in ["y", "yes", "true", "t", "1"]:
            if lines is None:
                return ""

        if line > -1:
            return lines.split("\n")[line].split("\t")[1]
        else:
            non_empty_lines = [
                line.split("\t")[1] for line in lines.split("\n") if
                len(line.split("\t")) > 1 and len(line.split("\t")[1].strip())
            ]
            return non_empty_lines[SimpleRandom.get_instance().next_rand(
                0,
                len(non_empty_lines) - 1)]
Ejemplo n.º 3
0
def evidence_num_to_text(db: Union[Dict, FeverDocDB],
                         page_id: str,
                         line: int,
                         is_snopes: bool = False):
    assert isinstance(
        db, Dict) or not is_snopes, "db should be dictionary for Snopes data"
    assert isinstance(
        db, FeverDocDB) or is_snopes, "db should be fever DB for fever data"
    logger = LogHelper.get_logger("evidence_num_to_text")
    if is_snopes:
        return evidence_num_to_text_snopes(db, page_id, line)
    lines = db.get_doc_lines(page_id)
    if lines is None:
        return ""
    if line > -1:
        return lines.split("\n")[line].split("\t")[1]
    else:
        non_empty_lines = [
            line.split("\t")[1] for line in lines.split("\n")
            if len(line.split("\t")) > 1 and len(line.split("\t")[1].strip())
        ]
        return non_empty_lines[SimpleRandom.get_instance().next_rand(
            0,
            len(non_empty_lines) - 1)]
Ejemplo n.º 4
0
from retrieval.fever_doc_db import FeverDocDB
from retrieval.filter_uninformative import uninformative

parser = argparse.ArgumentParser()
parser.add_argument('db_path', type=str, help='/path/to/fever.db')

args = parser.parse_args()

jlr = JSONLineReader()

docdb = FeverDocDB(args.db_path)

idx = docdb.get_non_empty_doc_ids()
idx = list(filter(lambda item: not uninformative(item), tqdm(idx)))

r = SimpleRandom.get_instance()

with open("data/fever/test.ns.rand.jsonl", "w+") as f:
    for line in jlr.read("data/fever-data/test.jsonl"):
        if line["label"] == "NOT ENOUGH INFO":

            for evidence_group in line['evidence']:
                for evidence in evidence_group:
                    evidence[2] = idx[r.next_rand(0, len(idx))]
                    evidence[3] = -1

        f.write(json.dumps(line) + "\n")

with open("data/fever/dev.ns.rand.jsonl", "w+") as f:
    for line in jlr.read("data/fever-data/dev.jsonl"):
        if line["label"] == "NOT ENOUGH INFO":