Example #1
0
    def write_edges(self, fh, edge_func):
        c = SSCursor(self.dbh)
        _start = time.time()

        c.execute(self.edges_query)

        # write the header:
        fh.write("*Arcs\r\n")

        # I need some duplicate protection

        count = 0
        res = c.fetchone()
        while (res):
            (_to, _from, _weight) = res
            count += 1
            # could call external func here
            fh.write("%s %s %f\r\n" % (self.map[_to], self.map[_from],
                                       (_weight or 1)))
            if (count % 1000 == 0):
                sys.stdout.write('.')
                sys.stdout.flush()

            # get next record
            res = c.fetchone()

        # end this function
        c.close()
        print "\nWrote %i Edges in %f\n" % (count, time.time() - _start)
Example #2
0
  def write_edges(self, fh, edge_func):
    c = SSCursor(self.dbh)
    _start = time.time()

    c.execute(self.edges_query)

    # write the header:
    fh.write("*Arcs\r\n")
    
    # I need some duplicate protection

    count = 0
    res = c.fetchone()
    while( res):
      (_to, _from, _weight) = res
      count +=1
      # could call external func here
      fh.write("%s %s %f\r\n" %(self.map[_to], self.map[_from], (_weight or 1)))
      if(count % 1000 == 0):
        sys.stdout.write('.')        
        sys.stdout.flush()
      
      # get next record
      res = c.fetchone()
      
    # end this function
    c.close()
    print "\nWrote %i Edges in %f\n" %(count, time.time() - _start)
Example #3
0
  def write_edges(self, fh, edge_func):
    c = SSCursor(self.dbh)
    _start = time.time()

    c.execute(self.edges_query)

    # write the header:
    fh.write("*Edges\r\n")
    
    # I need some duplicate protection
    count = 0
    res = c.fetchone()
    while( res):
      (_to, _from, _weight) = res
      count +=1
      
      # check it:
      min = _to
      max = _from

      if(min > max):
        min = _from
        max = _to

      if(self._biglist.matrix[min][max] == 1):
        res = c.fetchone()
        continue
      else:
        self._biglist.matrix[min][max] = 1

      # could call external func here
      fh.write("%s %s %f\r\n" %(self.map[_to], self.map[_from], (_weight or 1)))
      if(count % 50 == 0):
        sys.stdout.write('.')        
        sys.stdout.flush()
      
      # get next record
      res = c.fetchone()
      
    # end this function
    c.close()
    print "\nWrote %i Edges in %f\n" %(count, time.time() - _start)
Example #4
0
 def import_data(self, limit=None):
     '''Import etsa database to given document and segment storage.
     Keyword arguments:
     limit - if not None, then import only `limit` number of first documents.
     '''
     self.logger.info('Importing ETSA data. Limit is ' + str(limit))
     
     self._pre_import_data_hook()
     
     cur = SSCursor(self._conn)
     cur.execute(self._get_query(limit))
     
     result = cur.fetchone()
     numprocessed = 0
     while result is not None:
         self._process_single(result)
         numprocessed += 1
         if limit is not None and numprocessed >= limit:
             break
         result = cur.fetchone()
         # TODO: add multithreading
     self.logger.info('Processed {0} documents!'.format(numprocessed))