def parse_xml(file_name, size=0, method="json"):
    """Parse single XML file into the separated XML instances.
    
    Args:
        :param file_name(str): Name of file within same level as root.
        :param size(int): Number of XML instances within XML file to be read.
        :param method(str): Export method(Default:Json).
    
    Returns: 
    int.  The return code::

             0 -- Success!
    """
    # Set initial values
    count = 0
    results = []
    # add benchmark
    bm = BenchMark()
    global MAX_NUMBER_OF_PATENTS
    # create an XML Reader
    parser = xml.sax.make_parser()
    # turn off namespaces
    parser.setFeature(xml.sax.handler.feature_namespaces, 0)
    # turn off validation for DTD
    parser.setFeature(xml.sax.handler.feature_external_ges, False)
    # override the default Context Handler
    xml_patent_handler = PatentHandler()
    parser.setContentHandler(xml_patent_handler)
    try:
        with open(file_name) as citation:
            bm.toggleOn('Start processing [ ]')
            for xml_part in xml_documents(citation):
                # Cast string back to file-like object to parse
                parser.parse(cStringIO.StringIO(xml_part))
                results.append(
                    copy.deepcopy(xml_patent_handler.serialization()))
                count = count + 1
                if not MAX_NUMBER_OF_PATENTS:
                    if count == int(size):
                        break
                # Clean up stack after processing one xml paragraph
                xml_patent_handler.reset()
                bm.add(0)

        bm.toggleOff(' \bOK] - ' + str(count) + ' patents ')
        if method == "json":
            export2json(results)
        return 0

    except IOError as e:
        raise e
def parse_xml(file_name, size = 0, method = "json"):
    """Parse single XML file into the separated XML instances.
    
    Args:
        :param file_name(str): Name of file within same level as root.
        :param size(int): Number of XML instances within XML file to be read.
        :param method(str): Export method(Default:Json).
    
    Returns: 
    int.  The return code::

             0 -- Success!
    """
    # Set initial values
    count = 0
    results = []
    # add benchmark
    bm = BenchMark()
    global MAX_NUMBER_OF_PATENTS  
    # create an XML Reader
    parser = xml.sax.make_parser()
    # turn off namespaces
    parser.setFeature(xml.sax.handler.feature_namespaces, 0)
    # turn off validation for DTD
    parser.setFeature(xml.sax.handler.feature_external_ges, False)
    # override the default Context Handler
    xml_patent_handler = PatentHandler()
    parser.setContentHandler(xml_patent_handler)
    try:
        with open(file_name) as citation:
            bm.toggleOn('Start processing [ ]')
            for xml_part in xml_documents(citation):
                # Cast string back to file-like object to parse
                parser.parse(cStringIO.StringIO(xml_part))
                results.append(copy.deepcopy(xml_patent_handler.serialization()))   
                count = count+1
                if not MAX_NUMBER_OF_PATENTS:
                    if count == int(size):
                        break  
                # Clean up stack after processing one xml paragraph
                xml_patent_handler.reset()
                bm.add(0)
         
        bm.toggleOff(' \bOK] - '+ str(count) + ' patents ')
        if method == "json":
            export2json(results)
        return 0
                       
    except IOError as e:
        raise e
def export2json(data):
    """Export to json file
    
    Args:
        :param data(list): data list to be exported.
        
    Returns: 
    int.  The return code::

             0 -- Success!
    """
    bm = BenchMark()
    bm.toggleOn('Dumping to json [ ]')
    with open('data.json', 'w') as outfile:
        json.dump(data, outfile, indent=4, ensure_ascii=False)
        bm.toggleOff('\bOK] - saved to data.json')
    return 0
def export2json(data):
    """Export to json file
    
    Args:
        :param data(list): data list to be exported.
        
    Returns: 
    int.  The return code::

             0 -- Success!
    """
    bm = BenchMark()
    bm.toggleOn('Dumping to json [ ]')
    with open('data.json', 'w') as outfile:
        json.dump(data, outfile, indent=4, ensure_ascii=False)
        bm.toggleOff('\bOK] - saved to data.json')
    return 0