def write_clusters(xmlcollection, clusters, base_clust_dir, type_='soft'): """ @param xmlcollection: collection of XML documents @param clusters: Structure containing the clustered document indices and the stems used for doing so. @param base_clust_dir: Where the base directory for storing clusters reside. @param type_: Default arg is 'soft' indicating our clusters params reflects soft clusters. Other possible argument: 'hard' for hard clustering. """ clust_dir = base_clust_dir + type_ + sep """ try: makedirs(clust_dir) except OSError, e: print(e) """ # XXX: Check clust_no = 1 for docs, clust_stems in clusters: specific_clust_dir = clust_dir + str(clust_no) + sep try: makedirs(specific_clust_dir) except OSError, e: print(e) for doc_id in docs: xmldoc = xmlcollection.get_doc(doc_id) f = open(specific_clust_dir + xmldoc.get_id(), "w", get_def_enc()) f.write(xmldoc.get_rawcontent()) f.close() print "clust_dir: ", clust_dir, "clust_no: ", clust_no, " [written]" f = open(specific_clust_dir + "clust_stems", "w", get_def_enc()) g = open("/home/hernani/clust_stems", "a", get_def_enc()) # DEBUG line for clust_stem in clust_stems: f.write(str(clust_stem) + "\n") # stem should come here directly g.write(str(clust_stem) + "\n") # DEBUG line f.close() g.close() clust_no += 1
def export_d3_js(G, files_dir=get_web_output_dir(), graphname=get_def_graph_name(), group=None, width=get_webgraph_res()[0], height=get_webgraph_res()[1], node_labels=False, encoding=get_def_enc()): """ A function that exports a NetworkX graph as an interavtice D3.js object. The function builds a folder, containing the graph's formatted JSON, the D3.js JavaScript, and an HTML page to load the graph in a browser. Parameters ---------- G : graph a NetworkX graph files_dir : string, optional name of directory to save files graphname : string, optional the name of the graph being save as JSON, will appears in directory as 'graphname.json' group : string, optional The name of the 'group' key for each node in the graph. This is used to assign nodes to exclusive partitions, and for node coloring if visualizing. width : int, optional width (px) of display frame for graph object in browser window height : int, optional height (px) of display frame for graph object in browser window node_labels : bool, optional If true, nodes are displayed with labels in browser encoding: string, optional Specify which encoding to use when writing file. Examples -------- >>> from scipy import random >>> from networkx.readwrite import d3_js >>> G = nx.random_lobster(20, .8, .8) >>> low = 0 >>> high = 5 >>> G.add_nodes_from(map(lambda i: (i, {'group': random.random_integers(low, high, 1)[0]}), G.nodes())) >>> G.add_edges_from(map(lambda e: (e[0], e[1], {'weight': random.random_integers(low+1, high, 1)[0]}), G.edges())) >>> d3_js.export_d3_js(G, files_dir="random_lobster", graphname="random_lobster_graph", node_labels=False) """ if not os.path.exists(files_dir): os.makedirs(files_dir) # Begin by creating the necessary JS and HTML files write_d3_js(G, path=files_dir+"/"+graphname+".json", group=group, encoding=encoding) graph_force_html = open(files_dir+'/'+graphname+'.html', 'w') for line in d3_html.split("\n"): if line.find('"../../d3.js"') > 0: line = line.replace('"../../d3.js"', '"d3/d3.js"') if line.find('"../../d3.geom.js"') > 0: line = line.replace('"../../d3.geom.js"', '"d3/d3.geom.js"') if line.find('"../../d3.layout.js"') > 0: line = line.replace('"../../d3.layout.js"', '"d3/d3.layout.js"') if line.find('"force.css"') > 0: line = line.replace('"force.css"', '"d3/force.css"') if line.find('"force.js"') > 0: line = line.replace('"force.js"', '"'+graphname+'.js"') graph_force_html.write(line+'\n'.encode(encoding)) graph_force_html.close()
argv.append(get_mailfolder() + "*") for arg in argv[1:]: for filename in glob(arg): # If works, document is well-formed try: parseFile(filename) # If exception occurs, document is not well-formed; add to # collection of invalid docs. except Exception, e: add_invalid_docs(filename, str(e)) print filename continue # Prepare XML file to write invalid input XML files of the # collection into. invalid_xml_filehandler = open(get_invalid_xml_filename(), "w", get_def_enc()) invalid_xmldoc = Doc() invalid_xmldocs = invalid_xmldoc.createElement("invalid_xmldocsection") invalid_xmldoc.appendChild(invalid_xmldocs) invalidstat = defaultdict(int) # Check collection of invalid docs and effectively write XML # invalid file. for err, no in invalidstat.items(): print err + " : " + str(no) invalid_xml_filehandler.write(invalid_xmldoc.toprettyxml()) invalid_xml_filehandler.close() if len(invalidstat.values()) == 0: print "No XML errors found in " + get_mailfolder() else: print "XML file with detailed error info written to " \