예제 #1
0
def import_rows():
    row = Row()
    pos = row.diff.pos
    neg = row.diff.neg
    pos.observed.sparsity = ProductValue.Observed.SPARSE
    neg.observed.sparsity = ProductValue.Observed.SPARSE
    with open_compressed(RAW) as infile:
        doc_count = int(infile.next())
        word_count = int(infile.next())
        observed_count = int(infile.next())
        print 'Importing {} observations of {} words in {} documents'.format(
            observed_count, word_count, doc_count)
        with open_compressed(DIFFS, 'wb') as outfile:
            current_doc = None
            for line in infile:
                doc, feature, count = line.split()
                if doc != current_doc:
                    if current_doc is not None:
                        pos.observed.sparse.sort()
                        neg.observed.sparse.sort()
                        protobuf_stream_write(row.SerializeToString(), outfile)
                        print_dot(every=1000)
                    current_doc = doc
                    row.id = int(doc)
                    del pos.booleans[:]
                    del pos.observed.sparse[:]
                    del neg.booleans[:]
                    del neg.observed.sparse[:]
                feature = int(feature) - 1
                pos.observed.sparse.append(feature)
                pos.booleans.append(True)
                neg.observed.sparse.append(feature)
                neg.booleans.append(False)
            protobuf_stream_write(row.SerializeToString(), outfile)
예제 #2
0
파일: main.py 프로젝트: jostheim/loom
def import_rows():
    row = Row()
    pos = row.diff.pos
    neg = row.diff.neg
    pos.observed.sparsity = ProductValue.Observed.SPARSE
    neg.observed.sparsity = ProductValue.Observed.SPARSE
    with open_compressed(RAW) as infile:
        doc_count = int(infile.next())
        word_count = int(infile.next())
        observed_count = int(infile.next())
        print 'Importing {} observations of {} words in {} documents'.format(
            observed_count,
            word_count,
            doc_count)
        with open_compressed(DIFFS, 'wb') as outfile:
            current_doc = None
            for line in infile:
                doc, feature, count = line.split()
                if doc != current_doc:
                    if current_doc is not None:
                        pos.observed.sparse.sort()
                        neg.observed.sparse.sort()
                        protobuf_stream_write(row.SerializeToString(), outfile)
                        print_dot(every=1000)
                    current_doc = doc
                    row.id = int(doc)
                    del pos.booleans[:]
                    del pos.observed.sparse[:]
                    del neg.booleans[:]
                    del neg.observed.sparse[:]
                feature = int(feature) - 1
                pos.observed.sparse.append(feature)
                pos.booleans.append(True)
                neg.observed.sparse.append(feature)
                neg.booleans.append(False)
            protobuf_stream_write(row.SerializeToString(), outfile)
예제 #3
0
파일: query.py 프로젝트: jostheim/loom
 def send(self, request):
     assert isinstance(request, Query.Request), request
     request_string = request.SerializeToString()
     protobuf_stream_write(request_string, self.proc.stdin)
     self.proc.stdin.flush()
예제 #4
0
파일: query.py 프로젝트: edwardt/loom
 def call_string(self, request_string):
     protobuf_stream_write(request_string, self.proc.stdin)
예제 #5
0
파일: util.py 프로젝트: dlovell/loom
 def call_string(self, request_string):
     protobuf_stream_write(request_string, self.proc.stdin)
     return protobuf_stream_read(self.proc.stdout)
예제 #6
0
파일: query.py 프로젝트: truell20/loom
 def send(self, request):
     assert isinstance(request, Query.Request), request
     request_string = request.SerializeToString()
     protobuf_stream_write(request_string, self.proc.stdin)
     self.proc.stdin.flush()