def test_generate_columns(self):
        """Test the method generating columns of a given dataset"""
        def generate_columns_longer(ds):
            """ a much longer implemntation of the column generation"""
            from invenio.bibrecord import record_add_field
            rec = {}
            columns = [[num, "", ""] for num in xrange(ds.num_columns)]
            # (number, header, title)
            cur_col = 0
            for hd in ds.column_headers:
                for i in xrange(hd["colspan"]):
                    columns[cur_col][1] = hd["content"].strip()
                    cur_col += 1
            cur_col = 0
            for ct in ds.column_titles:
                for i in xrange(ct["colspan"]):
                    columns[cur_col][2] = ct["content"].strip()
                    cur_col += 1
            for col in columns:
                subfields = [("n", str(col[0]))]
                if col[2] != "":
                    subfields.append(("t", col[2]))
                if col[1] != "":
                    subfields.append(("d", col[1]))

                record_add_field(rec, "910", subfields = subfields)
            return rec

        ds = Dataset()
        ds.column_headers = [{"content": "header1", "colspan" : 1},
                      {"content": "header2", "colspan" : 3}]

        ds.column_titles = [{"content": "title1", "colspan" : 2},
                            {"content": "title2", "colspan" : 1}]
        ds.num_columns = 6

        self.assertEqual(ds.generate_columns(), generate_columns_longer(ds), \
                             "Incorrectly generated columns")
    def test_parse_record(self):
        """Tests building record form the MARC XML"""
        rec_string = """<record>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">This is the caption</subfield>
    <subfield code="9">HEPDATA</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="9">HEPDATA</subfield>
  </datafield>
  <datafield tag="710" ind1=" " ind2=" ">
    <subfield code="g">ATLAS</subfield>
  </datafield>
  <datafield tag="786" ind1=" " ind2=" ">
    <subfield code="w">214657</subfield>
    <subfield code="r">arXiv:something</subfield>
    <subfield code="h">F1</subfield>
  </datafield>
  <datafield tag="336" ind1=" " ind2=" ">
    <subfield code="t">DATASET</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">DATA</subfield>
  </datafield>
  <!-- definitions of columns -->
  <datafield tag="911" ind1=" " ind2=" ">
    <subfield code="x">1</subfield>
    <subfield code="y">2</subfield>
  </datafield>

  <datafield tag="910" ind1=" " ind2=" ">
    <subfield code="t">column title</subfield>
    <subfield code="d">column description</subfield>
    <subfield code="n">0</subfield>
  </datafield>

  <datafield tag="910" ind1=" " ind2=" ">
    <subfield code="t">title2</subfield>
    <subfield code="d">column description</subfield>
    <subfield code="n">1</subfield>
  </datafield>

  <datafield tag="910" ind1=" " ind2=" ">
    <subfield code="t">title2</subfield>
    <subfield code="d">description2</subfield>
    <subfield code="n">2</subfield>
  </datafield>

  <!-- encoding data qualifiers -->
  <datafield tag="653" ind1="1" ind2=" ">
    <subfield code="r">1</subfield>
    <subfield code="c">0</subfield>
    <subfield code="c">1</subfield>
  </datafield>
  <datafield tag="653" ind1="1" ind2=" ">
    <subfield code="k">m</subfield>
    <subfield code="v">v</subfield>
    <subfield code="c">2</subfield>
  </datafield>
  <datafield tag="653" ind1="1" ind2=" ">
    <subfield code="k">p</subfield>
    <subfield code="v">q</subfield>
    <subfield code="c">0</subfield>
  </datafield>
  <datafield tag="653" ind1="1" ind2=" ">
    <subfield code="k">w</subfield>
    <subfield code="v">g</subfield>
    <subfield code="c">1</subfield>
    <subfield code="c">2</subfield>
  </datafield>
  <datafield tag="653" ind1="1" ind2=" ">
    <subfield code="r">2</subfield>
    <subfield code="c">0</subfield>
    <subfield code="c">1</subfield>
  </datafield>
  <datafield tag="653" ind1="1" ind2=" ">
    <subfield code="k">z</subfield>
    <subfield code="v">v</subfield>
    <subfield code="c">2</subfield>
  </datafield>
</record>
"""
        rec = bibrecord.create_record(rec_string)
#        print "The record string: %s\n" % (str(rec), )

        ds = Dataset.create_from_record(rec[0], '(l.', 214657, "")
#        print str(ds.data_qualifiers)

        self.assertEqual(3, ds.num_columns, \
                             "Incorrect number of columns has been read")


        # asserting column titles:
        self.assertEqual(2, len(ds.column_headers), "Incorrect number of headers")
        self.assertEqual("column title", ds.column_titles[0]["content"], \
                         "Incorrect content of the first column")
        self.assertEqual(1, ds.column_titles[0]["colspan"], \
                             "Incorrect colspan of the title of first column")
        self.assertEqual("title2", ds.column_titles[1]["content"], \
                             "Incorrect content of the second and third column")
        self.assertEqual(2, ds.column_titles[1]["colspan"], \
                             "Incorrect colspan of the title of second and " + \
                             "third column")

        # asserting on column descriptions

        self.assertEqual(2, len(ds.column_titles), \
                         "Incorrect number of column titles")
        self.assertEqual("column description", ds.column_headers[0]["content"], \
                             "Incorrect description of the first andsecond" + \
                             " column")
        self.assertEqual(2, ds.column_headers[0]["colspan"], \
                             "Incorrect colspan of the description first" + \
                             " and second column")

        self.assertEqual("description2", ds.column_headers[1]["content"], \
                             "Incorrect description of the third column")
        self.assertEqual(1, ds.column_headers[1]["colspan"], \
                             "Incorrect colspan of the description thirdcolumn")

        self.assertEqual(3, len(ds.data_qualifiers), \
                             "Incorrect number of detected dscriptor rows")

        existing_qual = []
        for q_line in ds.data_qualifiers:
            l_pos = 0
            for q in q_line:
                existing_qual.append((l_pos, q))
                l_pos += 1

        self.assertTrue((0, {"content": "RE : 1", "colspan" : 2 }) in \
                            existing_qual)
        self.assertTrue((1, {"content": "m : v", "colspan" : 1 }) in \
                            existing_qual)
        self.assertTrue((0, {"content": "p : q", "colspan" : 1 }) in \
                            existing_qual)
        self.assertTrue((1, {"content": "w : g", "colspan" : 2 }) in \
                            existing_qual)
        self.assertTrue((0, {"content": "RE : 2", "colspan" : 2 }) in \
                            existing_qual)
        self.assertTrue((1, {"content": "z : v", "colspan" : 1 }) in \
                            existing_qual)


        # now testing the comparison function ... on the same dataset
        ds2 = Dataset.create_from_record(rec[0], '(l.', 214657, "")