Python Tensor Examples

Programming Language: Python

Namespace/Package Name: util

Class/Type: Tensor

Examples at hotexamples.com: 2

Python Tensor - 2 examples found. These are the top rated real world Python examples of util.Tensor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

from_file(1)

index(1)

items(1)

keys(1)

matricise(1)

query(1)

to_file(1)

values(1)

Example #1

Show file

File: strg.py Project: GullyAPCBurns/SKIMMR

 def __init__(self,trace=False):
   self.lexicon = Lexicon()
   self.sources = Tensor(rank=4)
   self.corpus = Tensor(rank=3)
   self.perspectives = dict([(x,Tensor(rank=2)) for x in PERSP_TYPES])
   self.types = {}
   self.synonyms = {}
   self.trace = trace

Example #2

Show file

File: strg.py Project: GullyAPCBurns/SKIMMR

class MemStore:

  def __init__(self,trace=False):
    self.lexicon = Lexicon()
    self.sources = Tensor(rank=4)
    self.corpus = Tensor(rank=3)
    self.perspectives = dict([(x,Tensor(rank=2)) for x in PERSP_TYPES])
    self.types = {}
    self.synonyms = {}
    self.trace = trace

  def convert(self,statement):
    """
    Backwards compatibility function for converting between integer and 
    string representations of statements.
    """

    return tuple([self.lexicon[x] for x in statement])

  def incorporate(self,path,ext='.tsv'):
    """
    Imports the statements into the store, processing all files with the 
    specified extension ext in the path location. Lexicon and sources 
    structures are updated (not overwritten) in the process. 
    """

    # first pass to update the lexicon
    expressions = []
    for fname in [os.path.join(path,x) for x in os.listdir(path) if 
    os.path.isfile(os.path.join(path,x)) and \
    os.path.splitext(x)[-1].lower() == ext.lower()]:
      for line in open(fname,'r'):
        try:
          s,p,o,prov,rel = line.split('\t')[:5]
          rel = float(rel)
          expressions += [s,p,o,prov]
        except:
          sys.stderr.write('W (loading memory-based store) - '+\
            'something wrong with line:\n%s' % (line,))
    self.lexicon.update(expressions)
    # second pass to update the sources tensor
    for fname in [os.path.join(path,x) for x in os.listdir(path) if 
    os.path.isfile(os.path.join(path,x)) and \
    os.path.splitext(x)[-1].lower() == ext.lower()]:
      for line in open(fname,'r'):
        try:
          s,p,o,prov,rel = line.split('\t')[:5]
          rel = float(rel)
          key = tuple([self.lexicon[x] for x in [s,p,o,prov]])
          self.sources[key] = rel
        except:
          sys.stderr.write('W (loading memory-based store) - '+\
            'something wrong with line:\n%s' % (line,))

  def dump(self,filename):
    # straightforward (but somehow slow) (de)serialisation using cPickle
    cPickle.dump(self,open(filename,'wb'))

  def load(self,filename):
    # straightforward (but somehow slow) (de)serialisation using cPickle
    self = cPickle(open(filename,'rb'))

  def exp(self,path,compress=True,core_only=True):
    # exporting the whole store as tab-separated value files to a directory
    # (gzip compression is used by default)
    # note that only lexicon, sources and corpus structures are exported, any
    # possibly precomputed corpus perspectives have to be re-created!
    # also, integer indices are used - for lexicalised (human readable) export
    # of sources and corpus, use exportSources() and exportCorpus() functions
    # setting the filenames
    lex_fn = os.path.join(path,'lexicon.tsv')
    src_fn = os.path.join(path,'sources.tsv')
    crp_fn = os.path.join(path,'corpus.tsv')
    if compress:
      lex_fn += '.gz'
      src_fn += '.gz'
      crp_fn += '.gz'
    openner, sig = open, 'w'
    if compress:
      openner, sig = gzip.open, 'wb'
    lex_f = openner(lex_fn,sig)
    src_f = openner(src_fn,sig)
    crp_f = openner(crp_fn,sig)
    self.lexicon.to_file(lex_f)
    self.sources.to_file(src_f)
    self.corpus.to_file(crp_f)
    lex_f.close()
    src_f.close()
    crp_f.close()

  def imp(self,path,compress=True):
    # importing the whole store as tab-separated value files from a directory
    # effectively an inverse of the exp() function
    lex_fn = os.path.join(path,'lexicon.tsv')
    src_fn = os.path.join(path,'sources.tsv')
    crp_fn = os.path.join(path,'corpus.tsv')
    if compress:
      lex_fn += '.gz'
      src_fn += '.gz'
      crp_fn += '.gz'
    openner, sig = open, 'r'
    if compress:
      openner, sig = gzip.open, 'rb'
    lex_f = openner(lex_fn,sig)
    src_f = openner(src_fn,sig)
    crp_f = openner(crp_fn,sig)
    self.lexicon.from_file(lex_f)
    self.sources.from_file(src_f)
    self.corpus.from_file(crp_f)
    lex_f.close()
    src_f.close()
    crp_f.close()

  def computeCorpus(self):
    # number of all triples
    N = 0 
    # x -> number of independednt occurences in the store
    indep_freq = {}
    # (x,y) -> number of joint occurences in the store
    joint_freq = {}
    # (s,p,o) -> number of occurences
    tripl_freq = {}
    # (s,p,o) -> (provenance, relevance)
    spo2pr = {}
    # going through all the statements in the sources
    for s,p,o,d in self.sources.keys():
      N += 1
      if indep_freq.has_key(s):
        indep_freq[s] += 1
      else:
        indep_freq[s] = 1
      if indep_freq.has_key(o):
        indep_freq[o] += 1
      else:
        indep_freq[o] = 1
      if joint_freq.has_key((s,o)):
        joint_freq[(s,o)] += 1
      else:
        joint_freq[(s,o)] = 1
      if tripl_freq.has_key((s,p,o)):
        tripl_freq[(s,p,o)] += 1
      else:
        tripl_freq[(s,p,o)] = 1
      if not spo2pr.has_key((s,p,o)):
        spo2pr[(s,p,o)] = []
      spo2pr[(s,p,o)].append((d,self.sources[(s,p,o,d)]))
    # going only through the unique triples now regardless of their provenance
    for s,p,o in spo2pr:
      # a list of relevances of particular statement sources
      src_rels = [x[1] for x in spo2pr[(s,p,o)]]
      # absolute frequency of the triple times it's mutual information score
      joint = joint_freq[(s,o)]
      if (o,s) in joint_freq:
        joint += joint_freq[(o,s)]
      # frequency times mutual information score
      fMI = 0.0
      try:
        fMI = \
        tripl_freq[(s,p,o)]*log(float(N*joint)/(indep_freq[s]*indep_freq[o]),2)
      except ValueError:
        continue
      # setting the corpus tensor value
      self.corpus[(s,p,o)] = fMI*(float(sum(src_rels))/len(src_rels))

  def normaliseCorpus(self,cut_off=0.95,min_quo=0.1):
    # corpus normalisation by a value that is greater or equal to the 
    # percentage of weight values given by the cut_off parameter
    # (if the values are below zero, they are set to the min_quo 
    # fraction of the minimal normalised value
    ws = sorted(self.corpus.values())
    norm_cons = ws[int(cut_off*len(ws)):][0]
    min_norm = min([x for x in ws if x > 0])*min_quo
    for key in self.corpus:
      w = self.corpus[key]/norm_cons
      if w < 0:
        w = min_norm
      if w > 1:
        w = 1.0
      self.corpus[key] = w

  def computePerspective(self,ptype):
    self.perspectives[ptype] = self.corpus.matricise(PERSP2PIVDIM[ptype])

  def indexSources(self):
    self.sources.index()

  def indexCorpus(self):
    self.corpus.index()

  def indexPerspective(self,ptype):
    self.perspectives[ptype].index()

  def getProvenance(self,statement):
    # getting the statement elements
    s,p,o = statement
    # getting integer ID versions of the statement elements
    if type(s) in [unicode,str]:
      s = self.lexicon[s]
    if type(p) in [unicode,str]:
      p = self.lexicon[p]
    if type(o) in [unicode,str]:
      o = self.lexicon[o]
    # evalating query on the sources tensor and collating the results
    return [x[3] for x, rel in self.sources.query((s,p,o,None))]
    
  def getRelevance(self,prov):
    if type(prov) in [unicode,str]:
      prov = self.lexicon[prov]
    return max(set([rel for x, rel in \
      self.sources.query((None,None,None,prov))]))

  def exportSources(self,filename,lexicalised=True):
    # export the sources tensor to a file, in a tab-separated value format,
    # either with integer or lexicalised keys
    f = open(filename,'w')
    if lexicalised:
      f.write('\n'.join(['\t'.join([self.lexicon[x] for x in [s,p,o,d]]+\
        [str(w)]) for (s,p,o,d),w in self.sources.items()]))
    else:
      f.write('\n'.join(['\t'.join([str(x) for x in [s,p,o,d,w]]) for \
        (s,p,o,d),w in self.sources.items()]))
    f.close()

  def exportCorpus(self,filename,lexicalised=True):
    # export the corpus tensor to a file, in a tab-separated value format
    # either with integer or lexicalised keys
    f = open(filename,'w')
    if lexicalised:
      f.write('\n'.join(['\t'.join([self.lexicon[x] for x in [s,p,o]]+[str(w)])\
        for (s,p,o),w in self.corpus.items()]))
    else:
      f.write('\n'.join(['\t'.join([str(x) for x in [s,p,o,w]]) for \
        (s,p,o),w in self.corpus.items()]))
    f.close()