Example #1
0
def test_hexdump():
    """
    Routine hexdump() reads one file and hexdumps it to another.
    """
    pytest.debug_func()
    exp = "\n".join([
        " 54 77 61 73  20 62 72 69  6c 6c 69 67  20 61 6e 64    "
        "Twas bri llig and",
        " 20 74 68 65  20 73 6c 69  74 68 65 20  74 6f 76 65    "
        " the sli the tove",
        " 73 0a 44 69  64 20 67 79  72 65 20 61  6e 64 20 67    "
        "s.Did gy re and g",
        " 69 6d 62 6c  65 20 72 6f  75 6e 64 20  74 68 65 20    "
        "imble ro und the ",
        " 77 61 62 65  0a 41 6c 6c  20 6d 69 6d  73 79 20 77    "
        "wabe.All  mimsy w",
        " 65 72 65 20  74 68 65 20  62 6f 72 6f  67 72 6f 76    "
        "ere the  borogrov",
        " 65 73 0a 41  6e 64 20 74  68 65 20 6d  6f 6d 65 20    "
        "es.And t he mome ",
        " 72 61 74 68  73 20 6f 75  74 67 72 61  62 65 0a       "
        "raths ou tgrabe. ",
        ""
        ])

    q = sio("\n".join(["Twas brillig and the slithe toves",
                       "Did gyre and gimble round the wabe",
                       "All mimsy were the borogroves",
                       "And the mome raths outgrabe\n"]))
    z = sio()
    hd.hexdump(q, z)
    result = z.getvalue()
    z.close()
    q.close()
    assert exp == result
Example #2
0
    def test_readGraph_kthlist_non_dag(self):

        self.assertRaises(ValueError, readGraph, sio(kthlist_non_dag), graph_type="digraph")
        self.assertRaises(ValueError, readGraph, sio(kthlist_non_dag), graph_type="dag", file_format="kthlist")
        G = readGraph(sio(kthlist_non_dag), graph_type="digraph", file_format="kthlist")
        self.assertEqual(G.order(), 3)
        self.assertEqual(len(G.edges()), 3)
Example #3
0
    def test_readGraph_kthlist_non_bipartite(self):

        self.assertRaises(ValueError, readGraph, sio(kthlist_non_bipartite), graph_type="bipartite")
        self.assertRaises(
            ValueError, readGraph, sio(kthlist_non_bipartite), graph_type="bipartite", file_format="kthlist"
        )
        G = readGraph(sio(kthlist_non_bipartite), graph_type="simple", file_format="kthlist")
        self.assertEqual(G.order(), 5)
        self.assertEqual(len(G.edges()), 5)
Example #4
0
    def test_readGraph_dot_path2(self):

        if "dot" not in supported_formats()["simple"]:
            self.skipTest("No support for Dot file I/O.")

        self.assertRaises(ValueError, readGraph, sio(dot_path2), graph_type="simple")
        G = readGraph(sio(dot_path2), graph_type="simple", file_format="dot")
        self.assertEqual(G.order(), 3)
        self.assertEqual(len(G.edges()), 2)
Example #5
0
    def test_readGraph_gml_path2(self):

        self.assertRaises(ValueError,
                          readGraph,
                          sio(gml_path2),
                          graph_type='simple')
        G = readGraph(sio(gml_path2), graph_type='simple', file_format='gml')
        self.assertEqual(G.order(), 3)
        self.assertEqual(len(G.edges()), 2)
Example #6
0
 def csv_rows(partitionNumber, rows):
     rowCount = 0
     inputStr = "\n".join(rows)
     if partitionNumber == 0:
         return iter([pandas.read_csv(sio(inputStr), *args, header=None,
                                      names=mynames, skiprows=_skiprows,
                                      **kwargs)])
     else:
         # could use .iterows instead?
         return iter([pandas.read_csv(sio(inputStr), *args, header=None,
                                      names=mynames, **kwargs)])
Example #7
0
    def test_readGraph_dot_path2(self):

        if 'dot' not in supported_formats()['simple']:
            self.skipTest("No support for Dot file I/O.")

        self.assertRaises(ValueError,
                          readGraph,
                          sio(dot_path2),
                          graph_type='simple')
        G = readGraph(sio(dot_path2), graph_type='simple', file_format='dot')
        self.assertEqual(G.order(), 3)
        self.assertEqual(len(G.edges()), 2)
Example #8
0
    def test_non_symmetric_input_wrong(self):
        """Symmetric encoding on non-symmetric graph

        The formula in this test uses the symmetric encoding for a non
        symmetric graph. This causes the formula to be unsatisfiable,
        even if it should be SAT.

        """
        G = readGraph(sio(example1), "simple", file_format="kthlist")
        T = readGraph(sio(example1alt), "simple", file_format="kthlist")
        F = SubgraphFormula(G, [T], symmetric=True)  # This should cause the wrong answer
        self.assertUNSAT(F)
Example #9
0
    def test_non_symmetric_input_right(self):
        """Symmetric encoding on non-symmetric graph

        The formula in this test uses the NON symmetric encoding for
        a non symmetric graph. This causes the formula to be
        satisfiable, as it should be.

        """
        G = readGraph(sio(example1), "simple", file_format="kthlist")
        T = readGraph(sio(example1alt), "simple", file_format="kthlist")
        F = SubgraphFormula(G, [T])
        self.assertSAT(F)
Example #10
0
    def test_non_symmetric_input_right(self):
        """Symmetric encoding on non-symmetric graph

        The formula in this test uses the NON symmetric encoding for
        a non symmetric graph. This causes the formula to be
        satisfiable, as it should be.

        """
        G = readGraph(sio(example1), "simple", file_format="kthlist")
        T = readGraph(sio(example1alt), "simple", file_format="kthlist")
        F = SubgraphFormula(G, [T])
        self.assertSAT(F)
    def try_compress_decompress(self, data):
        source = sio(data)
        dest = sio()
        encode_file(source, dest)
        dest.seek(0)
        
        decoded = sio()
        decode_file(dest, decoded)

        decoded_data = decoded.getvalue()
        self.assertEqual( len(data), len(decoded_data))
        self.assertEqual( data, decoded_data )
Example #12
0
    def test_non_symmetric_input_wrong(self):
        """Symmetric encoding on non-symmetric graph

        The formula in this test uses the symmetric encoding for a non
        symmetric graph. This causes the formula to be unsatisfiable,
        even if it should be SAT.

        """
        G = readGraph(sio(example1), "simple", file_format="kthlist")
        T = readGraph(sio(example1alt), "simple", file_format="kthlist")
        F = SubgraphFormula(
            G, [T], symmetric=True)  # This should cause the wrong answer
        self.assertUNSAT(F)
Example #13
0
 def csv_rows(partition_number, rows):
     # pylint: disable=unexpected-keyword-arg
     in_str = "\n".join(rows)
     if partition_number == 0:
         return iter([
             pandas.read_csv(
                 sio(in_str), *args, header=None,
                 names=mynames,
                 skiprows=_skiprows,
                 **kwargs)])
     else:
         # could use .iterows instead?
         return iter([pandas.read_csv(sio(in_str), *args, header=None,
                                      names=mynames, **kwargs)])
Example #14
0
 def csv_rows(partition_number, rows):
     # pylint: disable=unexpected-keyword-arg
     in_str = "\n".join(rows)
     if partition_number == 0:
         return iter([
             pandas.read_csv(
                 sio(in_str), *args, header=None,
                 names=mynames,
                 skiprows=_skiprows,
                 **kwargs)])
     else:
         # could use .iterows instead?
         return iter([pandas.read_csv(sio(in_str), *args, header=None,
                                      names=mynames, **kwargs)])
Example #15
0
 def csv_file(partitionNumber, files):
     file_count = 0
     for filename, contents in files:
         # Only skip lines on the first file
         if partitionNumber == 0 and file_count == 0 and _skiprows > 0:
             yield pandas.read_csv(sio(contents), *args,
                                   header=None,
                                   names=mynames,
                                   skiprows=_skiprows, **kwargs)
         else:
             file_count += 1
             yield pandas.read_csv(sio(contents), *args,
                                   header=None,
                                   names=mynames,
                                   **kwargs)
Example #16
0
    def test_readGraph_kthlist_non_bipartite(self):

        self.assertRaises(ValueError,
                          readGraph,
                          sio(kthlist_non_bipartite),
                          graph_type='bipartite')
        self.assertRaises(ValueError,
                          readGraph,
                          sio(kthlist_non_bipartite),
                          graph_type='bipartite',
                          file_format='kthlist')
        G = readGraph(sio(kthlist_non_bipartite),
                      graph_type='simple',
                      file_format='kthlist')
        self.assertEqual(G.order(), 5)
        self.assertEqual(len(G.edges()), 5)
Example #17
0
    def test_readGraph_kthlist_bipartite(self):

        G = readGraph(sio(kthlist_bipartite), graph_type="bipartite", file_format="kthlist")
        self.assertEqual(G.order(), 5)
        L, R = bipartite_sets(G)
        self.assertEqual(len(L), 2)
        self.assertEqual(len(R), 3)
Example #18
0
def _hostmaskPatternEqual(pattern, hostmask):
    try:
        return _patternCache[pattern](hostmask) is not None
    except KeyError:
        # We make our own regexps, rather than use fnmatch, because fnmatch's
        # case-insensitivity is not IRC's case-insensitity.
        fd = sio()
        for c in pattern:
            if c == '*':
                fd.write('.*')
            elif c == '?':
                fd.write('.')
            elif c in '[{':
                fd.write('[[{]')
            elif c in '}]':
                fd.write(r'[}\]]')
            elif c in '|\\':
                fd.write(r'[|\\]')
            elif c in '^~':
                fd.write('[~^]')
            else:
                fd.write(re.escape(c))
        fd.write('$')
        f = re.compile(fd.getvalue(), re.I).match
        _patternCache[pattern] = f
        return f(hostmask) is not None
Example #19
0
    def test_readGraph_kthlist_non_dag(self):

        self.assertRaises(ValueError,
                          readGraph,
                          sio(kthlist_non_dag),
                          graph_type='digraph')
        self.assertRaises(ValueError,
                          readGraph,
                          sio(kthlist_non_dag),
                          graph_type='dag',
                          file_format='kthlist')
        G = readGraph(sio(kthlist_non_dag),
                      graph_type='digraph',
                      file_format='kthlist')
        self.assertEqual(G.order(), 3)
        self.assertEqual(len(G.edges()), 3)
Example #20
0
    def send_xml(self, xml_content):
        """ Method used to send a given xml file to the switches
        """
        req = pycurl.Curl()

        #ignore ssl certificate verification
        if self.method is 'https':
            req.setopt(req.SSL_VERIFYPEER, 0)
            req.setopt(req.SSL_VERIFYHOST, 0)

        #set url being used
        req.setopt(req.URL, self._create_url())

        ziped = sio()
        with gzip.GzipFile(fileobj=ziped, mode='w') as gzip_file:
            gzip_file.write(xml_content)

        run_data = ziped.getvalue()

        #sets necessary multipart fields and adds the zip from buffer
        data = [('page', 'file_upload'),
               ('running_part', '1'),
               ('file_to_upload', (req.FORM_BUFFER, 'upate_config',
                                  req.FORM_BUFFERPTR, run_data))]

        #sets POST method and the multipart packet
        req.setopt(req.HTTPPOST, data)

        #executes curl and exits
        req.perform()
        req.close()
Example #21
0
def get():
    output = sio()
    output.write('GET\n')
    output.write('args: {}\n'.format(request.args))
    content = output.getvalue()
    output.close()
    return Response(content, mimetype='text/plain')
Example #22
0
 def serialize(self, format='csv'):
     """
     Retorna a representação em CSV de toda a agregação.
     """
     utf8_recoder = lambda s: s.encode('utf-8') if isinstance(s, unicode) \
         else s # funcao auxiliar de codificacao em utf-8
     def getter(obj, atr):
         if '/' in atr:
             d, k = atr.split('/')
             if getattr(obj, d, None):
                 return getattr(obj, d, {}).get(k, None)
             else:
                 return None
         else:
             return getattr(obj, atr, None)
     s = sio() # buffer string IO
     w = csv_writer(s)
     # cabecalhos das colunas
     cols = ['id', 'uri']
     cols.extend(sorted(self.cols))
     w.writerow(cols)
     # valores das colunas
     for obj in self.aggregator:
         w.writerow(map(
             utf8_recoder, # csv_writer nao escreve unicode
             (getter(obj, atr) for atr in cols)
             ))
     r = s.getvalue()
     s.close()
     return r
     return self.aggregator
Example #23
0
    def send_xml(self, xml_content):
        """ Method used to send a given xml file to the switches
        """

        # set url being used
        url = self._create_url()

        ziped = sio()
        with gzip.GzipFile(fileobj=ziped, mode='w') as gzip_file:
            gzip_file.write(xml_content)

        run_data = ziped.getvalue()
        ziped.close()

        fields = (('page', 'file_upload'),
                  ('running_part', '1'),
                  ('file_to_upload', ('file_to_upload',
                                      run_data,
                                      'application/octet-stream')))
        m = MultipartEncoder(fields=fields, boundary='-----boundary-----')
        r = requests.post(url=url,
                          data=m,
                          auth=self.auth,
                          headers={'Content-type': m.content_type},
                          verify=False)

        print r.text
Example #24
0
 def test_rowset_as_schema(self):
     from StringIO import StringIO as sio
     ts = CSVTableSet.from_fileobj(sio('''name,dob\nmk,2012-01-02\n'''))
     rs = ts.tables[0]
     jts = rowset_as_jts(rs).as_dict()
     assert_equal(jts['fields'], [{'type': 'string', 'id': u'name', 'label': u'name'},
                                  {'type': 'date', 'id': u'dob', 'label': u'dob'}])
Example #25
0
 def test_rowset_as_schema(self):
     from StringIO import StringIO as sio
     ts = CSVTableSet(sio('''name,dob\nmk,2012-01-02\n'''))
     rs = ts.tables[0]
     jts = rowset_as_jts(rs).as_dict()
     assert_equal(jts['fields'], [
         {'type': 'string', 'id': u'name', 'label': u'name'},
         {'type': 'date', 'id': u'dob', 'label': u'dob'}])
Example #26
0
 def csv_file(partitionNumber, files):
     file_count = 0
     for filename, contents in files:
         # Only skip lines on the first file
         if partitionNumber == 0 and file_count == 0 and _skiprows > 0:
             yield pandas.read_csv(sio(contents),
                                   *args,
                                   header=None,
                                   names=mynames,
                                   skiprows=_skiprows,
                                   **kwargs)
         else:
             file_count += 1
             yield pandas.read_csv(sio(contents),
                                   *args,
                                   header=None,
                                   names=mynames,
                                   **kwargs)
Example #27
0
    def test_readGraph_kthlist_bipartite(self):

        G = readGraph(sio(kthlist_bipartite),
                      graph_type='bipartite',
                      file_format='kthlist')
        self.assertEqual(G.order(), 5)
        L, R = bipartite_sets(G)
        self.assertEqual(len(L), 2)
        self.assertEqual(len(R), 3)
Example #28
0
    def test_low_level_gml_read_path2(self):

        G = nx.read_gml(sio(gml_path2))

        self.assertEqual(G.order(), 3)
        self.assertEqual(len(G.edges()), 2)
        self.assertTrue(G.has_edge(0, 1))
        self.assertTrue(G.has_edge(1, 2))
        self.assertFalse(G.has_edge(0, 2))
Example #29
0
    def test_low_level_dimacs_read_path2(self):

        G = cnfformula.graphs._read_graph_dimacs_format(sio(dimacs_path2))

        self.assertEqual(G.order(), 3)
        self.assertEqual(len(G.edges()), 2)
        self.assertTrue(G.has_edge(1, 2))
        self.assertTrue(G.has_edge(2, 3))
        self.assertFalse(G.has_edge(1, 3))
Example #30
0
def post():
    output = sio()
    output.write('POST\n')
    output.write('form: {}\n'.format(request.form))
    output.write('data: {}\n'.format(request.data))
    output.write('files: {}\n'.format(request.files))
    content = output.getvalue()
    output.close()
    return Response(content, mimetype='text/plain')
Example #31
0
    def test_low_level_dimacs_read_path2(self):

        G = cnfformula.graphs._read_graph_dimacs_format(sio(dimacs_path2))

        self.assertEqual(G.order(), 3)
        self.assertEqual(len(G.edges()), 2)
        self.assertTrue(G.has_edge(1, 2))
        self.assertTrue(G.has_edge(2, 3))
        self.assertFalse(G.has_edge(1, 3))
Example #32
0
    def test_low_level_gml_read_path2(self):

        G = nx.read_gml(sio(gml_path2))

        self.assertEqual(G.order(), 3)
        self.assertEqual(len(G.edges()), 2)
        self.assertTrue(G.has_edge(0, 1))
        self.assertTrue(G.has_edge(1, 2))
        self.assertFalse(G.has_edge(0, 2))
Example #33
0
 def csv_file(partition_number, files):
     # pylint: disable=unexpected-keyword-arg
     file_count = 0
     for _, contents in files:
         # Only skip lines on the first file
         if partition_number == 0 and file_count == 0 and _skiprows > 0:
             yield pandas.read_csv(
                 sio(contents), *args,
                 header=None,
                 names=mynames,
                 skiprows=_skiprows,
                 **kwargs)
         else:
             file_count += 1
             yield pandas.read_csv(
                 sio(contents), *args,
                 header=None,
                 names=mynames,
                 **kwargs)
Example #34
0
 def csv_file(partition_number, files):
     # pylint: disable=unexpected-keyword-arg
     file_count = 0
     for _, contents in files:
         # Only skip lines on the first file
         if partition_number == 0 and file_count == 0 and _skiprows > 0:
             yield pandas.read_csv(
                 sio(contents), *args,
                 header=None,
                 names=mynames,
                 skiprows=_skiprows,
                 **kwargs)
         else:
             file_count += 1
             yield pandas.read_csv(
                 sio(contents), *args,
                 header=None,
                 names=mynames,
                 **kwargs)
Example #35
0
    def test_rowset_as_schema(self):
        from StringIO import StringIO as sio

        ts = CSVTableSet(sio("""name,dob\nmk,2012-01-02\n"""))
        rs = ts.tables[0]
        jts = rowset_as_jts(rs).as_dict()
        assert_equal(
            jts["fields"],
            [{"type": "string", "id": u"name", "label": u"name"}, {"type": "date", "id": u"dob", "label": u"dob"}],
        )
Example #36
0
def decode_hex_string( st, n ):
    """Takes hexadecimal string, containing numerator and denominator, separated by single space, and returns first n decoded hexadecimal digits
    """
    num, den = [ int(s, 16) for s in st.split(" ") ]
    s = sio()
    for i in xrange(n):
        num = num*16
        d = num / den
        num = num % den
        s.write("%x"%d)
    return s.getvalue()
Example #37
0
def long_to_bytes( n, nbytes=None ):
    """Convert long value to string of bytes, either minimal or of given length """
    odata = sio()
    written = 0
    while n > 0:
        odata.write(chr(n & 0xff))
        written += 1
        n >>= 8
    if nbytes is not None:
        for i in xrange(nbytes-written):
            odata.write('\x00')
    return odata.getvalue()[::-1]
Example #38
0
 def csv_rows(partitionNumber, rows):
     rowCount = 0
     inputStr = "\n".join(rows)
     if partitionNumber == 0:
         return iter([
             pandas.read_csv(sio(inputStr),
                             *args,
                             header=None,
                             names=mynames,
                             skiprows=_skiprows,
                             **kwargs)
         ])
     else:
         # could use .iterows instead?
         return iter([
             pandas.read_csv(sio(inputStr),
                             *args,
                             header=None,
                             names=mynames,
                             **kwargs)
         ])
Example #39
0
    def test_send_xml(self):

        test_url = '1.1.1.1'
        test_auth = ('user', 'pass')
        test_method = ('https')

        test_rpc = rpc.RPC(test_auth[0], test_auth[1], test_url, test_method)

        rpc.get = mock.Mock()
        cfg = ds.Cfg_data()

        vlan = ds.Vlan_global(42, name="vlan_test", ports=ds.Pbits([1, 3, 4]))

        cfg.vlans.append(vlan)

        test_rpc.send_xml(cfg.as_xml_text())

        # mock_calls returns the calls executed to this method, the 0 means we
        # are getting the first (and only) call, and the 2 means we are getting
        # the keyword parameters.
        parameters = requests.post.mock_calls[0][2]

        # retrieve url
        received_url = parameters['url']

        expected_url = 'https://1.1.1.1/System/File/file_config.html'

        self.assertEquals(expected_url, received_url)

        # retrieve data from the parameters.
        data = parameters['data']

        expected_xml = '<cfg_data><vlan_global id0="42"><vid>42</vid>' + \
                       '<active>1</active><name>vlan_test</name>' + \
                       '<pbmp_untagged id0="0"><pbits id0="0">13</pbits>' + \
                       '</pbmp_untagged></vlan_global></cfg_data>'

        # Since the XML has to be the last parameter to be passed, we have to
        # get the last field.
        zippedXML = data.fields[-1][-1][-2]

        # decompress to do the comparison.
        # get the file with the zipped xml as content
        zipFileObject = sio(zippedXML)

        # get the actual file (from which we read)
        with gzip.GzipFile(fileobj=zipFileObject, mode='r') as zipFile:
            received_xml = zipFile.read()

        zipFileObject.close()

        self.assertEquals(expected_xml, received_xml)
Example #40
0
    def test_send_xml(self):

        test_url = '1.1.1.1'
        test_auth = ('user', 'pass')
        test_method = ('https')

        test_rpc = rpc.RPC(test_auth[0], test_auth[1], test_url, test_method)

        rpc.get = mock.Mock()
        cfg = ds.CfgData()

        vlan = ds.VlanGlobal(42, name="vlan_test", ports=ds.Pbits([1, 3, 4]))

        cfg.vlans.append(vlan)

        test_rpc.send_xml(cfg.as_xml_text())

        # mock_calls returns the calls executed to this method, the 0 means we
        # are getting the first (and only) call, and the 2 means we are getting
        # the keyword parameters.
        parameters = requests.post.mock_calls[0][2]

        # retrieve url
        received_url = parameters['url']

        expected_url = 'https://1.1.1.1/System/File/file_config.html'

        self.assertEquals(expected_url, received_url)

        # retrieve data from the parameters.
        data = parameters['data']

        expected_xml = '<cfg_data><vlan_global id0="42"><vid>42</vid>' + \
                       '<active>1</active><name>vlan_test</name>' + \
                       '<pbmp_untagged id0="0"><pbits id0="0">13</pbits>' + \
                       '</pbmp_untagged></vlan_global></cfg_data>'

        # Since the XML has to be the last parameter to be passed, we have to
        # get the last field.
        zippedXML = data.fields[-1][-1][-2]

        # decompress to do the comparison.
        # get the file with the zipped xml as content
        zipFileObject = sio(zippedXML)

        # get the actual file (from which we read)
        with gzip.GzipFile(fileobj=zipFileObject, mode='r') as zipFile:
            received_xml = zipFile.read()

        zipFileObject.close()

        self.assertEquals(expected_xml, received_xml)
Example #41
0
 def close(self):
     super(XMLAggregator, self).close()
     str_buffer = sio()
     next_url = self.dataset_split.get('next_url', '')
     # estrutura agregadora
     writer = structwriter(stream=str_buffer, indent=True)
     feed = writer.feed(
         ROOT(
             E(self.name,
                 ({'total_registros': self.total_registros}
                     if self.total_registros else {}),
                 ( E( self.element_name(obj),
                     self.element_atrs(obj),
                     ( self.element(obj, atr)
                     for atr in getattr(obj,self.atributo_serializar) if (getattr(obj, atr) or isinstance(getattr(obj,atr),int)) ) )
                 for obj in self.aggregator ),
                 E('proximos', {'href':next_url})
                     if next_url else tuple(),
             )
         )
     )
     r = str_buffer.getvalue()
     str_buffer.close()
     self.serialization = r
Example #42
0
#!/usr/bin/env python
import messytables_jts
import messytables
from StringIO import StringIO as sio
ts = messytables.CSVTableSet.from_fileobj(sio('''name,dob\nmk,2012-01-02\n'''))
rs = ts.tables[0]
print messytables_jts.rowset_as_schema(rs).as_json()
Example #43
0
        for i,j in di.iteritems():
            item = dom.createElement(str(i))
            if isinstance(j, list) or isinstance(j, tuple):
                do_list(item, j)
            elif isinstance(j, dict):
                do_dict(item, j)
            else:
                do_else(item, j)
            node.appendChild(item)

    def do_else(node, obj):
        node.appendChild(dom.createTextNode(unicode(obj)))
        attr = ''
        if isinstance(obj, unicode) or isinstance(obj, str):
            attr = 'str'
        elif isinstance(obj, bool):
            attr = 'bool'
        elif isinstance(obj, int) or isinstance(obj, long):
            attr = 'int'
        node.setAttribute('type',attr)
         
    impl = minidom.getDOMImplementation()
    dom = impl.createDocument(None, "_BASE", None)                   
    result= dom.documentElement
    do_dict(result, obj)
    return result

json_to_object = lambda (j):json.load(sio(j))
object_to_json = lambda (obj):json.dumps(obj)
json_to_xml = lambda (j):object_to_xml(json_to_object(j))
xml_to_json = lambda (xml):object_to_json(xml_to_object(xml))
Example #44
0
    def read_csv(self, file_path, use_whole_file=False, names=None, skiprows=0,
                 *args, **kwargs):
        """Read a CSV file in and parse it into Pandas DataFrames. By default,
        the first row from the first partition of that data is parsed and used
        as the column names for the data from. If no 'names' param is
        provided we parse the first row of the first partition of data and
        use it for column names.

        Parameters
        ----------
        file_path: string
            Path to input. Any valid file path in Spark works here, eg:
            'file:///my/path/in/local/file/system' or 'hdfs:/user/juliet/'
        use_whole_file: boolean
            Whether of not to use the whole file.
        names: list of strings, optional
        skiprows: integer, optional
            indicates how many rows of input to skip. This will
            only be applied to the first partition of the data (so if
            #skiprows > #row in first partition this will not work). Generally
            this shouldn't be an issue for small values of skiprows.
        No other value of header is supported.
        All additional parameters available in pandas.read_csv() are usable
        here.

        Returns
        -------
        A SparklingPandas DataFrame that contains the data from the
        specified file.
        """
        def csv_file(partition_number, files):
            # pylint: disable=unexpected-keyword-arg
            file_count = 0
            for _, contents in files:
                # Only skip lines on the first file
                if partition_number == 0 and file_count == 0 and _skiprows > 0:
                    yield pandas.read_csv(
                        sio(contents), *args,
                        header=None,
                        names=mynames,
                        skiprows=_skiprows,
                        **kwargs)
                else:
                    file_count += 1
                    yield pandas.read_csv(
                        sio(contents), *args,
                        header=None,
                        names=mynames,
                        **kwargs)

        def csv_rows(partition_number, rows):
            # pylint: disable=unexpected-keyword-arg
            in_str = "\n".join(rows)
            if partition_number == 0:
                return iter([
                    pandas.read_csv(
                        sio(in_str), *args, header=None,
                        names=mynames,
                        skiprows=_skiprows,
                        **kwargs)])
            else:
                # could use .iterows instead?
                return iter([pandas.read_csv(sio(in_str), *args, header=None,
                                             names=mynames, **kwargs)])

        # If we need to peak at the first partition and determine the column
        # names
        mynames = None
        _skiprows = skiprows
        if names:
            mynames = names
        else:
            # In the future we could avoid this expensive call.
            first_line = self.spark_ctx.textFile(file_path).first()
            frame = pandas.read_csv(sio(first_line), **kwargs)
            # pylint sees frame as a tuple despite it being a DataFrame
            mynames = list(frame.columns)
            _skiprows += 1

        # Do the actual load
        if use_whole_file:
            return self.from_pandas_rdd(
                self.spark_ctx.wholeTextFiles(file_path)
                .mapPartitionsWithIndex(csv_file))
        else:
            return self.from_pandas_rdd(
                self.spark_ctx.textFile(file_path)
                    .mapPartitionsWithIndex(csv_rows))
Example #45
0
 def json_file_to_df(files):
     """ Transforms a JSON file into a list of data"""
     for _, contents in files:
         yield pandas.read_json(sio(contents), *args, **kwargs)
Example #46
0
    def test_low_level_dot_read_path2(self):

        if not has_dot_library():
            self.skipTest("DOT library not installed. Can't test DOT I/O")

        G = nx.Graph(find_read_dot()(sio(dot_path2)))
Example #47
0
    def read_csv(self, name, use_whole_file=False, names=None, skiprows=0,
                 *args, **kwargs):
        """Read a CSV file in and parse it into Pandas DataFrames.
        If no names is provided we use the first row for the names.
        header=0 is the default unless names is provided in which case
        header=None is the default.
        skiprows indicates how many rows of input to skip. This will
        only be applied to the first partition of the data (so if
        #skiprows > #row in first partition this will not work). Generally
        this shouldn't be an issue for small values of skiprows.
        No other values of header is supported.
        All additional parameters are passed to the read_csv function.
        """
        def csv_file(partitionNumber, files):
            file_count = 0
            for filename, contents in files:
                # Only skip lines on the first file
                if partitionNumber == 0 and file_count == 0 and _skiprows > 0:
                    yield pandas.read_csv(sio(contents), *args,
                                          header=None,
                                          names=mynames,
                                          skiprows=_skiprows, **kwargs)
                else:
                    file_count += 1
                    yield pandas.read_csv(sio(contents), *args,
                                          header=None,
                                          names=mynames,
                                          **kwargs)

        def csv_rows(partitionNumber, rows):
            rowCount = 0
            inputStr = "\n".join(rows)
            if partitionNumber == 0:
                return iter([pandas.read_csv(sio(inputStr), *args, header=None,
                                             names=mynames, skiprows=_skiprows,
                                             **kwargs)])
            else:
                # could use .iterows instead?
                return iter([pandas.read_csv(sio(inputStr), *args, header=None,
                                             names=mynames, **kwargs)])

        # If we need to peak at the first partition and determine the column
        # names
        mynames = None
        _skiprows = skiprows
        if names:
            mynames = names
        else:
            # In the future we could avoid this expensive call.
            first_line = self.sc.textFile(name).first()
            frame = pandas.read_csv(sio(first_line), **kwargs)
            mynames = list(frame.columns.values)
            _skiprows += 1

        # Do the actual load
        if use_whole_file:
            return PRDD.fromRDD(
                self.sc.wholeTextFiles(name).mapPartitionsWithIndex(csv_file))
        else:
            return PRDD.fromRDD(
                self.sc.textFile(name).mapPartitionsWithIndex(csv_rows))
Example #48
0
    def test_readGraph_dimacs_path2(self):

        self.assertRaises(ValueError, readGraph, sio(dimacs_path2), graph_type="simple")
        G = readGraph(sio(dimacs_path2), graph_type="simple", file_format="dimacs")
        self.assertEqual(G.order(), 3)
        self.assertEqual(len(G.edges()), 2)
Example #49
0
            item = dom.createElement(str(i))
            if isinstance(j, list) or isinstance(j, tuple):
                do_list(item, j)
            elif isinstance(j, dict):
                do_dict(item, j)
            else:
                do_else(item, j)
            node.appendChild(item)

    def do_else(node, obj):
        node.appendChild(dom.createTextNode(unicode(obj)))
        attr = ''
        if isinstance(obj, unicode) or isinstance(obj, str):
            attr = 'str'
        elif isinstance(obj, bool):
            attr = 'bool'
        elif isinstance(obj, int) or isinstance(obj, long):
            attr = 'int'
        node.setAttribute('type', attr)

    impl = minidom.getDOMImplementation()
    dom = impl.createDocument(None, "_BASE", None)
    result = dom.documentElement
    do_dict(result, obj)
    return result


json_to_object = lambda (j): json.load(sio(j))
object_to_json = lambda (obj): json.dumps(obj)
json_to_xml = lambda (j): object_to_xml(json_to_object(j))
xml_to_json = lambda (xml): object_to_json(xml_to_object(xml))
Example #50
0
    def read_csv(self,
                 name,
                 use_whole_file=False,
                 names=None,
                 skiprows=0,
                 *args,
                 **kwargs):
        """Read a CSV file in and parse it into Pandas DataFrames.
        If no names is provided we use the first row for the names.
        header=0 is the default unless names is provided in which case
        header=None is the default.
        skiprows indicates how many rows of input to skip. This will
        only be applied to the first partition of the data (so if
        #skiprows > #row in first partition this will not work). Generally
        this shouldn't be an issue for small values of skiprows.
        No other values of header is supported.
        All additional parameters are passed to the read_csv function.
        """
        def csv_file(partitionNumber, files):
            file_count = 0
            for filename, contents in files:
                # Only skip lines on the first file
                if partitionNumber == 0 and file_count == 0 and _skiprows > 0:
                    yield pandas.read_csv(sio(contents),
                                          *args,
                                          header=None,
                                          names=mynames,
                                          skiprows=_skiprows,
                                          **kwargs)
                else:
                    file_count += 1
                    yield pandas.read_csv(sio(contents),
                                          *args,
                                          header=None,
                                          names=mynames,
                                          **kwargs)

        def csv_rows(partitionNumber, rows):
            rowCount = 0
            inputStr = "\n".join(rows)
            if partitionNumber == 0:
                return iter([
                    pandas.read_csv(sio(inputStr),
                                    *args,
                                    header=None,
                                    names=mynames,
                                    skiprows=_skiprows,
                                    **kwargs)
                ])
            else:
                # could use .iterows instead?
                return iter([
                    pandas.read_csv(sio(inputStr),
                                    *args,
                                    header=None,
                                    names=mynames,
                                    **kwargs)
                ])

        # If we need to peak at the first partition and determine the column
        # names
        mynames = None
        _skiprows = skiprows
        if names:
            mynames = names
        else:
            # In the future we could avoid this expensive call.
            first_line = self.sc.textFile(name).first()
            frame = pandas.read_csv(sio(first_line), **kwargs)
            mynames = list(frame.columns.values)
            _skiprows += 1

        # Do the actual load
        if use_whole_file:
            return PRDD.fromRDD(
                self.sc.wholeTextFiles(name).mapPartitionsWithIndex(csv_file))
        else:
            return PRDD.fromRDD(
                self.sc.textFile(name).mapPartitionsWithIndex(csv_rows))
 def json_file(partitionNumber, files):
     for filename, contents in files:
         yield pandas.read_json(sio(contents), *args, **kwargs)
Example #52
0
    def test_low_level_dot_read_path2(self):

        if not has_dot_library():
            self.skipTest("DOT library not installed. Can't test DOT I/O")

        G = nx.Graph(find_read_dot()(sio(dot_path2)))
Example #53
0
 def __init__(self, s):
     self.fd = sio(s)
     self.last = None
Example #54
0
    def read_csv(self, file_path, use_whole_file=False, names=None, skiprows=0,
                 *args, **kwargs):
        """Read a CSV file in and parse it into Pandas DataFrames. By default,
        the first row from the first partition of that data is parsed and used
        as the column names for the data from. If no 'names' param is
        provided we parse the first row of the first partition of data and
        use it for column names.

        Parameters
        ----------
        file_path: string
            Path to input. Any valid file path in Spark works here, eg:
            'file:///my/path/in/local/file/system' or 'hdfs:/user/juliet/'
        use_whole_file: boolean
            Whether of not to use the whole file.
        names: list of strings, optional
        skiprows: integer, optional
            indicates how many rows of input to skip. This will
            only be applied to the first partition of the data (so if
            #skiprows > #row in first partition this will not work). Generally
            this shouldn't be an issue for small values of skiprows.
        No other value of header is supported.
        All additional parameters available in pandas.read_csv() are usable
        here.

        Returns
        -------
        A SparklingPandas DataFrame that contains the data from the
        specified file.
        """
        def csv_file(partition_number, files):
            # pylint: disable=unexpected-keyword-arg
            file_count = 0
            for _, contents in files:
                # Only skip lines on the first file
                if partition_number == 0 and file_count == 0 and _skiprows > 0:
                    yield pandas.read_csv(
                        sio(contents), *args,
                        header=None,
                        names=mynames,
                        skiprows=_skiprows,
                        **kwargs)
                else:
                    file_count += 1
                    yield pandas.read_csv(
                        sio(contents), *args,
                        header=None,
                        names=mynames,
                        **kwargs)

        def csv_rows(partition_number, rows):
            # pylint: disable=unexpected-keyword-arg
            in_str = "\n".join(rows)
            if partition_number == 0:
                return iter([
                    pandas.read_csv(
                        sio(in_str), *args, header=None,
                        names=mynames,
                        skiprows=_skiprows,
                        **kwargs)])
            else:
                # could use .iterows instead?
                return iter([pandas.read_csv(sio(in_str), *args, header=None,
                                             names=mynames, **kwargs)])

        # If we need to peak at the first partition and determine the column
        # names
        mynames = None
        _skiprows = skiprows
        if names:
            mynames = names
        else:
            # In the future we could avoid this expensive call.
            first_line = self.spark_ctx.textFile(file_path).first()
            frame = pandas.read_csv(sio(first_line), **kwargs)
            # pylint sees frame as a tuple despite it being a DataFrame
            mynames = list(frame.columns)
            _skiprows += 1

        # Do the actual load
        if use_whole_file:
            return self.from_pandas_rdd(
                self.spark_ctx.wholeTextFiles(file_path)
                .mapPartitionsWithIndex(csv_file))
        else:
            return self.from_pandas_rdd(
                self.spark_ctx.textFile(file_path)
                    .mapPartitionsWithIndex(csv_rows))
Example #55
0
def parse_state_xml(state_xml_files):
    # TODO: Make this work for both primary and general elections.
    # Primaries have two files, presidential and district:
    # urls = ('X12DP', 'X12PP')
    """Parse the state XML and insert it into the database"""

    errmsg = "Not all file paths are available. Did you pass in a list?"
    assert all(os.path.exists(path) for path in state_xml_files), errmsg

    affiliations = {'Democratic': 'DEM',
                    'Republican': 'REP',
                    'American Independent': 'AIP',
                    'Green': 'GRN',
                    'Libertarian': 'LIB',
                    'Peace and Freedom': 'P-F',
                    'Independent': 'IND',
                    'Non-Partisan': '',
                    'No Party Preference': ''}
    
    for xml_file in state_xml_files:

        ## ----- -------- ---- #
        ## I was having encoding problems, so I introduced chunk below.
        ## Mileage may vary and this may not be necessary in all cases ##
        with open(xml_file) as f:
            raw_text = f.read()
        xml_file = sio(raw_text.decode('cp1252').encode('utf8'))

        tree = ET.parse(xml_file)
        root = tree.getroot()
        if root is not None:
            contests = root.find('Count').find(
                'Election').find(
                    'Contests').getiterator('Contest')

            for contest in contests:
                contest_params = {}
                candidate_params = {}
                # Get the CountMetric data
                cm_list = contest.find('TotalVotes').findall('CountMetric')
                count_metrics = { cm.attrib['Id'] : cm.text 
                                  for cm in cm_list }

                # for cm in cm_list:
                #     count_metrics[cm.attrib['Id']] = cm.text
                #See if the contest already exists
                try:
                    c = StateContest.objects.get(
                        contest_identifier=contest.find('ContestIdentifier').attrib['Id'])
                    # print "Updating existing contest %s" % contest.find('ContestIdentifier').attrib['Id']
                    c.name = contest.find('ContestIdentifier').find(
                        'ContestName').text
                    c.precincts_reporting = count_metrics.get('PR', 0)
                    c.total_precincts = count_metrics.get('TP', 0)
                    c.pct_yes_votes = count_metrics.get('PYV', 0)
                    c.pct_no_votes = count_metrics.get('PNV', 0)
                    c.save()
                except StateContest.DoesNotExist:
                    # print "Creating new contest %s" % contest.find('ContestIdentifier').attrib['Id']
                    contest_params['contest_identifier'] = contest.find(
                        'ContestIdentifier').attrib['Id']
                    contest_params['contest_name'] = contest.find(
                        'ContestIdentifier').find('ContestName').text
                    contest_params['precincts_reporting'] = count_metrics.get('PR', 0)
                    contest_params['total_precincts'] = count_metrics.get('TP', 0)
                    contest_params['pct_yes_votes'] = count_metrics.get('PYV', 0)
                    contest_params['pct_no_votes'] = count_metrics.get('PNV', 0)
                    c = StateContest(**contest_params)
                    c.save()
                contest_id = c.id
                candidate_params['state_contest_id'] = contest_id
                for selection in contest.find('TotalVotes').findall('Selection'):
                    sel_count_metrics = {}
                    sel_cm_list = selection.findall('CountMetric')
                    for sel_cm in sel_cm_list:
                        sel_count_metrics[sel_cm.attrib['Id']] = sel_cm.text
                    try:
                        candidate_identifier = selection.find('Candidate').find('CandidateIdentifier').attrib['Id']
                    except:
                        candidate_identifier = 0
                    try:
                        proposal_identifier = selection.find('Candidate').find('ProposalItem').attrib['ProposalIdentifier']
                    except:
                        proposal_identifier = ''
                    try:
                        referendum_option_identifier = selection.find('Candidate').find('ProposalItem').attrib['ReferendumOptionIdentifier']
                    except:
                        referendum_option_identifier = ''
                    try:
                        sel = StateCandidate.objects.get((Q(candidate_identifier=candidate_identifier) & Q(state_contest__contest_identifier=contest.find('ContestIdentifier').attrib['Id'])) | (Q(proposal_identifier=proposal_identifier) & Q(referendum_option_identifier=referendum_option_identifier)))

                        if candidate_identifier != 0:
                            # print "Updating existing candidate %s" % candidate_identifier
                            #This is a candidate
                            sel.candidate_name = selection.find('Candidate').find('CandidateIdentifier').find('CandidateName').text
                            sel.candidate_identifier = selection.find('Candidate').find('CandidateIdentifier').attrib['Id']
                            affiliation = selection.find('Candidate').find('Affiliation').find('Type').text
                            sel.affiliation = affiliations[affiliation]
                            sel.valid_votes = selection.find('ValidVotes').text
                            sel.pct_votes_party = sel_count_metrics.get('PVP', 0)
                            sel.pct_votes_race = sel_count_metrics.get('PVR', 0)
                        elif proposal_identifier != 0:
                            #This is a proposal or judge
                            if selection.find('Candidate').find('ProposalItem').attrib['ReferendumOptionIdentifier'] == 'Yes':
                                sel.referendum_option_identifier = 'Yes'
                                sel.valid_votes = selection.find('ValidVotes').text
                                sel.pct_votes_race = count_metrics.get('PYV', 0)
                            else:
                                sel.referendum_option_identifier = 'No'
                                sel.valid_votes = selection.find('ValidVotes').text
                                sel.pct_votes_race = count_metrics.get('PNV', 0)
                        sel.save()
                    except StateCandidate.DoesNotExist:
                        # print "Adding new candidate %s" % candidate_identifier
                        if candidate_identifier != 0:
                            candidate_params['candidate_name'] = selection.find('Candidate').find('CandidateIdentifier').find('CandidateName').text
                            candidate_params['candidate_identifier'] = selection.find('Candidate').find('CandidateIdentifier').attrib['Id']
                            affiliation = selection.find('Candidate').find('Affiliation').find('Type').text
                            candidate_params['affiliation'] = affiliations[affiliation]
                            candidate_params['valid_votes'] = selection.find('ValidVotes').text
                            candidate_params['pct_votes_party'] = sel_count_metrics.get('PVP', 0)
                            candidate_params['pct_votes_race'] = sel_count_metrics.get('PVR', 0)
                        else:
                            candidate_params['proposal_identifier'] = selection.find('Candidate').find('ProposalItem').attrib['ProposalIdentifier']
                            if selection.find('Candidate').find('ProposalItem').attrib['ReferendumOptionIdentifier'] == 'Yes':
                                candidate_params['referendum_option_identifier'] = 'Yes'
                                candidate_params['valid_votes'] = selection.find('ValidVotes').text
                            else:
                                candidate_params['referendum_option_identifier'] = 'No'
                                candidate_params['valid_votes'] = selection.find('ValidVotes').text
                        sel = StateCandidate(**candidate_params)
                        # sel.StateContest = c
                        sel.save()
Example #56
0
 def json_file_to_df(files):
     """ Transforms a JSON file into a list of data"""
     for _, contents in files:
         yield pandas.read_json(sio(contents), *args, **kwargs)