def write_edges(self, fh, edge_func): c = SSCursor(self.dbh) _start = time.time() c.execute(self.edges_query) # write the header: fh.write("*Arcs\r\n") # I need some duplicate protection count = 0 res = c.fetchone() while (res): (_to, _from, _weight) = res count += 1 # could call external func here fh.write("%s %s %f\r\n" % (self.map[_to], self.map[_from], (_weight or 1))) if (count % 1000 == 0): sys.stdout.write('.') sys.stdout.flush() # get next record res = c.fetchone() # end this function c.close() print "\nWrote %i Edges in %f\n" % (count, time.time() - _start)
def write_edges(self, fh, edge_func): c = SSCursor(self.dbh) _start = time.time() c.execute(self.edges_query) # write the header: fh.write("*Arcs\r\n") # I need some duplicate protection count = 0 res = c.fetchone() while( res): (_to, _from, _weight) = res count +=1 # could call external func here fh.write("%s %s %f\r\n" %(self.map[_to], self.map[_from], (_weight or 1))) if(count % 1000 == 0): sys.stdout.write('.') sys.stdout.flush() # get next record res = c.fetchone() # end this function c.close() print "\nWrote %i Edges in %f\n" %(count, time.time() - _start)
def write_edges(self, fh, edge_func): c = SSCursor(self.dbh) _start = time.time() c.execute(self.edges_query) # write the header: fh.write("*Edges\r\n") # I need some duplicate protection count = 0 res = c.fetchone() while( res): (_to, _from, _weight) = res count +=1 # check it: min = _to max = _from if(min > max): min = _from max = _to if(self._biglist.matrix[min][max] == 1): res = c.fetchone() continue else: self._biglist.matrix[min][max] = 1 # could call external func here fh.write("%s %s %f\r\n" %(self.map[_to], self.map[_from], (_weight or 1))) if(count % 50 == 0): sys.stdout.write('.') sys.stdout.flush() # get next record res = c.fetchone() # end this function c.close() print "\nWrote %i Edges in %f\n" %(count, time.time() - _start)
def import_data(self, limit=None): '''Import etsa database to given document and segment storage. Keyword arguments: limit - if not None, then import only `limit` number of first documents. ''' self.logger.info('Importing ETSA data. Limit is ' + str(limit)) self._pre_import_data_hook() cur = SSCursor(self._conn) cur.execute(self._get_query(limit)) result = cur.fetchone() numprocessed = 0 while result is not None: self._process_single(result) numprocessed += 1 if limit is not None and numprocessed >= limit: break result = cur.fetchone() # TODO: add multithreading self.logger.info('Processed {0} documents!'.format(numprocessed))