def test_invalid_row_is_skipped(self):
        f = StringIO()
        f.writelines(["a,b"])
        f.seek(0)

        logging.basicConfig(level=40)  # Errors up to 30 (WARNING) are expected

        assert [] == list(CsvReader.read_csv(f))
 def setUp(self):
     TestCase.setUp(self)
     out = StringIO()
     out.writelines(["Q1,373,string,Universe\n",
                     "Q1,31,wikibase-entityid,Q223557\n",
                     "Q1,31,wikibase-entityid,Q1088088\n"])
     out.seek(0)
     self.result = list(CsvReader.read_csv(out))
    def test_invalid_row_is_skipped(self):
        f = StringIO()
        f.writelines(["a,b"])
        f.seek(0)

        logging.basicConfig(level=40)  # Errors up to 30 (WARNING) are expected

        assert [] == list(CsvReader.read_csv(f))
 def setUp(self):
     TestCase.setUp(self)
     out = StringIO()
     out.writelines([
         "Q1,373,string,Universe\n", "Q1,31,wikibase-entityid,Q223557\n",
         "Q1,31,wikibase-entityid,Q1088088\n"
     ])
     out.seek(0)
     self.result = list(CsvReader.read_csv(out))
    def test_unknown_type(self):
        out = StringIO()
        out.writelines(["Q1,unknown,373,string,Universe\n"])
        out.seek(0)

        logging.basicConfig(level=40)  # Errors up to 30 (WARNING) are expected

        result = list(CsvReader.read_csv(out))
        assert 'Q1' == result[0].title
    def test_unknown_type(self):
        out = StringIO()
        out.writelines(["Q1,unknown,373,string,Universe\n"])
        out.seek(0)

        logging.basicConfig(level=40)  # Errors up to 30 (WARNING) are expected

        result = list(CsvReader.read_csv(out))
        assert 'Q1' == result[0].title
    def test_multiple_entities(self):
        out = StringIO()
        out.writelines(["Q1,claim,373,string,Universe\n",
                        "Q2,claim,143,wikibase-item,Q328\n"])
        out.seek(0)
        result = list(CsvReader.read_csv(out))

        assert 2 == len(result)
        assert 'Q1' == result[0].title
        assert 'Q2' == result[1].title
Example #8
0
    def test_multiple_entities(self):
        out = StringIO()
        out.writelines(["Q1,claim,373,string,Universe\n",
                        "Q2,claim,143,wikibase-item,Q328\n"])
        out.seek(0)
        result = list(CsvReader.read_csv(out))

        self.assertThat(result, HasLength(2))
        self.assertThat(result[0].title, Equals("Q1"))
        self.assertThat(result[1].title, Equals("Q2"))
Example #9
0
 def test_universe(self):
     out = StringIO()
     out.writelines(["Q1,claim,373,string,Universe\n",
                     "Q1,reference,143,wikibase-item,Q328\n"
                     "Q1,claim,31,wikibase-item,Q223557\n",
                     "Q1,claim,31,wikibase-item,Q1088088\n",
                     "Q1,claim,361,wikibase-item,Q3327819\n",
                     "Q1,qualifier,31,wikibase-item,Q41719\n"])
     out.seek(0)
     result = list(CsvReader.read_csv(out))
     self.assert_universe(result)
    def test_multiple_entities(self):
        out = StringIO()
        out.writelines([
            "Q1,claim,373,string,Universe\n",
            "Q2,claim,143,wikibase-item,Q328\n"
        ])
        out.seek(0)
        result = list(CsvReader.read_csv(out))

        assert 2 == len(result)
        assert 'Q1' == result[0].title
        assert 'Q2' == result[1].title
    def test_universe(self):
        out = StringIO()
        out.writelines(["Q1,claim,373,string,Universe\n",
                        "Q1,reference,143,wikibase-item,Q328\n"
                        "Q1,claim,31,wikibase-item,Q223557\n",
                        "Q1,claim,31,wikibase-item,Q1088088\n",
                        "Q1,claim,361,wikibase-item,Q3327819\n",
                        "Q1,qualifier,31,wikibase-item,Q41719\n"])
        out.seek(0)
        result = list(CsvReader.read_csv(out))
        assert 1 == len(result)
        q1 = result[0]

        assert "Q1" == q1.title
        assert (Claim(Snak(373, "string", "Universe"), [],
                      [Snak(143, "wikibase-item", "Q328")]) in
                q1.claims)
        assert Claim(Snak(31, "wikibase-item", "Q223557")) in q1.claims
        assert Claim(Snak(31, "wikibase-item", "Q1088088")) in q1.claims
        assert (Claim(Snak(361, "wikibase-item", "Q3327819"),
                      [Snak(31, "wikibase-item", "Q41719")], []) in
                q1.claims)
    def test_universe(self):
        out = StringIO()
        out.writelines([
            "Q1,claim,373,string,Universe\n",
            "Q1,reference,143,wikibase-item,Q328\n"
            "Q1,claim,31,wikibase-item,Q223557\n",
            "Q1,claim,31,wikibase-item,Q1088088\n",
            "Q1,claim,361,wikibase-item,Q3327819\n",
            "Q1,qualifier,31,wikibase-item,Q41719\n"
        ])
        out.seek(0)
        result = list(CsvReader.read_csv(out))
        assert 1 == len(result)
        q1 = result[0]

        assert "Q1" == q1.title
        assert (Claim(Snak(373, "string", "Universe"), [],
                      [Snak(143, "wikibase-item", "Q328")]) in q1.claims)
        assert Claim(Snak(31, "wikibase-item", "Q223557")) in q1.claims
        assert Claim(Snak(31, "wikibase-item", "Q1088088")) in q1.claims
        assert (Claim(Snak(361, "wikibase-item", "Q3327819"),
                      [Snak(31, "wikibase-item", "Q41719")], []) in q1.claims)
import time

from propertysuggester.analyzer import CsvGenerator, RuleGenerator
from propertysuggester.parser import CsvReader
from propertysuggester.utils.CompressedFileType import CompressedFileType

if __name__ == "__main__":
    logging.basicConfig(level=20)  # Print logging.info

    parser = argparse.ArgumentParser(
        description="this program generates a correlation-table from "
        "the csv-dump")
    parser.add_argument("input",
                        help="The CSV input file (wikidata triple)",
                        type=CompressedFileType('rb'))
    parser.add_argument("output",
                        help="The CSV output file (default=sys.stdout)",
                        default=sys.stdout,
                        nargs='?',
                        type=CompressedFileType('wb'))
    args = parser.parse_args()

    start = time.time()
    logging.info("computing table")
    rules = RuleGenerator.compute_rules(CsvReader.read_csv(args.input))
    logging.info("writing csv")
    CsvGenerator.create_pair_csv(rules, args.output)
    logging.info("done - {0:.2f}s".format(time.time() - start))
    logging.info("now import this csv file with "
                 "PropertySuggester/maintenance/UpdateTable.php")
Example #14
0
import logging
import argparse
import sys
import time

from propertysuggester.analyzer import CsvGenerator, RuleGenerator
from propertysuggester.parser import CsvReader
from propertysuggester.utils.CompressedFileType import CompressedFileType

if __name__ == "__main__":
    logging.basicConfig(level=20) # Print logging.info

    parser = argparse.ArgumentParser(description="this program generates a correlation-table from the csv-dump")
    parser.add_argument("input", help="The CSV input file (wikidata triple)", type=CompressedFileType('rb'))
    parser.add_argument("output", help="The CSV output file (default=sys.stdout)", default=sys.stdout, nargs='?',
                        type=CompressedFileType('wb'))
    args = parser.parse_args()

    start = time.time()
    logging.info("computing table")
    rules = RuleGenerator.compute_rules(CsvReader.read_csv(args.input))
    logging.info("writing csv")
    CsvGenerator.create_pair_csv(rules, args.output)
    logging.info("done - {0:.2f}s".format(time.time()-start))
    logging.info("now import this csv file with PropertySuggester/maintenance/UpdateTable.php")