def test_ccipca_equivalence(): array = np.random.random(size=(10, 10)) col_mean = np.mean(array, axis=1) row_mean = np.mean(array, axis=0) overall_mean = np.mean(array) zero_mean_array = array - col_mean[:, np.newaxis] - row_mean[ np.newaxis, :] + overall_mean print zero_mean_array print np.mean(zero_mean_array, axis=0) print np.mean(zero_mean_array, axis=1) norm_array = normalize(zero_mean_array) U, Sigma, Vt = np.linalg.svd(norm_array) ccipca = CCIPCA(6, vector_size=10, amnesia=3.0) for iteration in xrange(500): for column in xrange(10): ccipca.fixed_iteration(norm_array[:, column], learn=True) a = ccipca._v[1:6] * 10 b = (U * Sigma * Sigma).T[:5] print(np.abs(a / b)) print Sigma[:5] print rmse(np.abs(a / b) - 1)
def __init__(self, transfreq=1, cnetfreq=2, spicefreq=10, k=20, filters=None): self.ccipca = CCIPCA(k, amnesia=2.0, remembrance=1000000.0) self.filters = filters self.iteration = 0 self.touchpoints = [] self.categories = {} self.transfreq = transfreq self.cnet = utils.feature_cycle(cthing) self.cnetfreq = cnetfreq self.spice = utils.feature_cycle(athing) self.spicefreq = spicefreq
def test_ccipca_equivalence(): array = np.random.random(size=(10,10)) col_mean = np.mean(array, axis=1) row_mean = np.mean(array, axis=0) overall_mean = np.mean(array) zero_mean_array = array - col_mean[:, np.newaxis] - row_mean[np.newaxis,:] + overall_mean print zero_mean_array print np.mean(zero_mean_array, axis=0) print np.mean(zero_mean_array, axis=1) norm_array = normalize(zero_mean_array) U, Sigma, Vt = np.linalg.svd(norm_array) ccipca = CCIPCA(6, vector_size=10, amnesia=3.0) for iteration in xrange(500): for column in xrange(10): ccipca.fixed_iteration(norm_array[:,column], learn=True) a = ccipca._v[1:6] * 10 b = (U * Sigma * Sigma).T[:5] print (np.abs(a/b)) print Sigma[:5] print rmse(np.abs(a/b)-1)
class SocNOC(object): def __init__(self, transfreq=1, cnetfreq=2, spicefreq=10, k=20, filters=None): self.ccipca = CCIPCA(k, amnesia=2.0, remembrance=1000000.0) self.filters = filters self.iteration = 0 self.touchpoints = [] self.categories = {} self.transfreq = transfreq self.cnet = utils.feature_cycle(cthing) self.cnetfreq = cnetfreq self.spice = utils.feature_cycle(athing) self.spicefreq = spicefreq def process_feed(self, feeds): self.process_labeled_feed(utils.make_tuples(feeds, None)) def process_labeled_feed(self, feeds): for current, word in self.process_feed_list(feeds): self.process_post(self.process_feed_item(current), word) @staticmethod def process_feed_list(feeds): return utils.weave_streams(utils.make_tuples(feedparser.parse(x)['items'], y) for (x, y) in feeds) @staticmethod def process_feed_item(current): text = current.get('content', current.get('summary', None)) if text is None: return if isinstance(text, list): text = text[0] if isinstance(text, dict): text = text['value'] return utils.strip_tags(text).strip() def process_post(self, post, word=None): for text in tokenizer.tokenize(post): if text: self.ccipca_iter('%s // %s' % (text, word), text, extras=word) def receive_tweet(self, tweet, word=None): user = '******' + tweet['user']['screen_name'] text = user + ' ' + utils.strip_tags(tweet['text']) if not self.filters or any( filt in text.lower() for filt in self.filters ): self.ccipca_iter(text, text, extras=word) def ccipca_iter(self, text, baretext, extras=None): assertion = utils.make_twit_vec(baretext, extras=extras) if assertion is not None: if self.cnetfreq and self.iteration % self.cnetfreq == 0: self._ccipca_iter(*self.cnet.next()) if self.spicefreq and self.iteration % self.spicefreq == 0: self._ccipca_iter(*self.spice.next()) self._ccipca_iter(text, assertion) self.iteration += 1 def _ccipca_iter(self, text, assertion): print 'iteration is really', self.iteration, "don't believe the following:", reconstructed = self.ccipca.iteration(assertion, True) if not ( self.transfreq and self.iteration % self.transfreq == 0 ): return sqnorm = numpy.sum(self.ccipca._v * self.ccipca._v, axis=1) concepts = {} for word in itertools.chain(( key[0] for key in assertion.keys() ), self.touchpoints): try: loc = self.ccipca._labels.index(word, touch=False) pos = self.ccipca._v[:,loc] concepts[word] = list(pos) except IndexError: pass categories = {} for catname, category in self.categories.items(): catvec = numpy.zeros(self.ccipca._v.shape[1]) for entry, value in category.items(): if entry in self.ccipca._labels: catvec[self.ccipca._labels.index(entry)] = value categories[catname] = list(numpy.dot(catvec, self.ccipca._v.T) * sqnorm) self.send({ 'text' : unicode(text), 'coordinates' : list(reconstructed), 'magnitudes' : list(numpy.sqrt(sqnorm)), 'concepts' : concepts, 'categories' : categories }) def send(self, data): # override this method print simplejson.dumps(data)
class SocNOC(object): def __init__(self, transfreq=1, cnetfreq=2, spicefreq=10, k=20, filters=None): self.ccipca = CCIPCA(k, amnesia=2.0, remembrance=1000000.0) self.filters = filters self.iteration = 0 self.touchpoints = [] self.categories = {} self.transfreq = transfreq self.cnet = utils.feature_cycle(cthing) self.cnetfreq = cnetfreq self.spice = utils.feature_cycle(athing) self.spicefreq = spicefreq def process_feed(self, feeds): self.process_labeled_feed(utils.make_tuples(feeds, None)) def process_labeled_feed(self, feeds): for current, word in self.process_feed_list(feeds): self.process_post(self.process_feed_item(current), word) @staticmethod def process_feed_list(feeds): return utils.weave_streams( utils.make_tuples(feedparser.parse(x)['items'], y) for (x, y) in feeds) @staticmethod def process_feed_item(current): text = current.get('content', current.get('summary', None)) if text is None: return if isinstance(text, list): text = text[0] if isinstance(text, dict): text = text['value'] return utils.strip_tags(text).strip() def process_post(self, post, word=None): for text in tokenizer.tokenize(post): if text: self.ccipca_iter('%s // %s' % (text, word), text, extras=word) def receive_tweet(self, tweet, word=None): user = '******' + tweet['user']['screen_name'] text = user + ' ' + utils.strip_tags(tweet['text']) if not self.filters or any(filt in text.lower() for filt in self.filters): self.ccipca_iter(text, text, extras=word) def ccipca_iter(self, text, baretext, extras=None): assertion = utils.make_twit_vec(baretext, extras=extras) if assertion is not None: if self.cnetfreq and self.iteration % self.cnetfreq == 0: self._ccipca_iter(*self.cnet.next()) if self.spicefreq and self.iteration % self.spicefreq == 0: self._ccipca_iter(*self.spice.next()) self._ccipca_iter(text, assertion) self.iteration += 1 def _ccipca_iter(self, text, assertion): print 'iteration is really', self.iteration, "don't believe the following:", reconstructed = self.ccipca.iteration(assertion, True) if not (self.transfreq and self.iteration % self.transfreq == 0): return sqnorm = numpy.sum(self.ccipca._v * self.ccipca._v, axis=1) concepts = {} for word in itertools.chain((key[0] for key in assertion.keys()), self.touchpoints): try: loc = self.ccipca._labels.index(word, touch=False) pos = self.ccipca._v[:, loc] concepts[word] = list(pos) except IndexError: pass categories = {} for catname, category in self.categories.items(): catvec = numpy.zeros(self.ccipca._v.shape[1]) for entry, value in category.items(): if entry in self.ccipca._labels: catvec[self.ccipca._labels.index(entry)] = value categories[catname] = list( numpy.dot(catvec, self.ccipca._v.T) * sqnorm) self.send({ 'text': unicode(text), 'coordinates': list(reconstructed), 'magnitudes': list(numpy.sqrt(sqnorm)), 'concepts': concepts, 'categories': categories }) def send(self, data): # override this method print simplejson.dumps(data)