def testManyDuplicationsInOrder1234(self): """ A list with duplicates of all of 1 2 3 4 (introduced in that order) should become the list 1 2 3 4 with only one of each. """ self.assertEquals([1, 2, 3, 4], uniqueList([1, 2, 1, 2, 3, 1, 2, 4, 1, 3, 2]))
def testManyDuplicationsInOrder4321(self): """ A list with duplicates of all of 4 3 2 1 (introduced in that order) should become the list 4 3 2 1 with only one of each. """ self.assertEquals([4, 3, 2, 1], uniqueList([4, 4, 3, 4, 3, 2, 3, 4, 4, 3, 1, 2, 4]))
def extractAtnames(comment): """Find all @names in a comment. @param comment: The C{unicode} comment text. @return: A C{list} of C{@}names, with no duplicates, in the order they appear in the comment. """ return uniqueList(findall(ATNAME_REGEX, comment))
def extractFiles(comment): """Find all files in a comment. @param comment: The C{unicode} comment text. @return: A C{list} of about values from the comment, with no duplicates, in the order they appear in the comment. """ return uniqueList(findall(FILE_REGEX, comment))
def extractPlustags(comment): """Find all +plustags in a comment. @param comment: The C{unicode} comment text. @return: A C{list} of +plustags, with no duplicates, in the order they appear in the comment. """ return uniqueList(findall(PLUSTAG_REGEX, comment))
def testNoDuplicates(self): """A list with no duplicates should be untouched.""" self.assertEquals(['hey', 'you', 'guys'], uniqueList(['hey', 'you', 'guys']))
def testSimpleDuplicate(self): """A list of two identical things gets reduced to length one.""" self.assertEquals(['hey'], uniqueList(['hey', 'hey']))
def testEmpty(self): """The empty list is the empty list when uniqued.""" self.assertEquals([], uniqueList([]))
def create(self, text, username, about=None, importer=None, when=None, url=None): """Create a new comment. @param text: The C{unicode} comment text. @param username: the C{unicode} username of the commenter. @param about: Optionally, a C{list} of C{unicode} values the comment is about. @param importer: A C{unicode} string giving the name of the importer. @param when: A C{datetime.datetime} instance or C{None} if the current time should be used. @param url: A C{str} URL or C{None} if there is no URL associated with this comment. @raise L{FeatureError}: if (1) the comment text is C{None} or is all whitespace, or (2) if the importer name contains the separator (space) that we use in the about value for comment objects. @return: A C{dict} as follows: { fluidinfo.com/info/about: A C{list} of all the about values (i.e., URLs and hashtags) in the comment text, including the thing the comment was about (if anything). The hashtags are in lowercase. fluidinfo.com/info/timestamp: The C{int} UTC timestamp (seconds since the epoch) the comment was created at. fluidinfo.com/info/url: The C{url}, as received. fluidinfo.com/info/username: The C{username}, as received. } """ if not text or text.strip() == '': raise FeatureError('Comment text non-existent or just whitespace.') if importer: if ' ' in importer: raise FeatureError('Comment importer name contains a space.') else: importer = u'fluidinfo.com' when = when or datetime.utcnow() floatTime = timegm(when.utctimetuple()) + float(when.strftime('0.%f')) isoTime = when.isoformat() if not url: url = 'https://fluidinfo.com/comment/%s/%s/%s' % ( importer, username, isoTime) # Put all the explicit about values into a list called abouts. Items # are stripped and those that are not URLs are lowercased. abouts = [] if about: for item in map(unicode.strip, about): abouts.append(item if URL_REGEX.match(item) else item.lower()) abouts.extend(self._extractAbouts(text)) abouts = uniqueList(abouts) commentObjectAbout = u'%s %s %s' % (importer, username, isoTime) commentID = self._objects.create(commentObjectAbout) values = { u'fluidinfo.com/info/about': abouts, u'fluidinfo.com/info/username': username, u'fluidinfo.com/info/text': text, u'fluidinfo.com/info/url': url, u'fluidinfo.com/info/timestamp': floatTime } self._tagValues.set({commentID: values}) if abouts: # Get all the object IDs of the target objects. If an object does # not exist, create it. result = getAboutTagValues(values=abouts) existingObjects = dict( result.values(AboutTagValue.value, AboutTagValue.objectID)) missingAbouts = set(abouts) - set(existingObjects.iterkeys()) for aboutValue in missingAbouts: existingObjects[aboutValue] = self._objects.create(aboutValue) createComment(commentID, existingObjects.values(), username, when) return values
def testDuplicatesWithInterveningElement(self): """A list with elements A B A should become just A B.""" self.assertEquals(['A', 'B'], uniqueList(['A', 'B', 'A']))
def create(self, text, username, about=None, importer=None, when=None, url=None): """Create a new comment. @param text: The C{unicode} comment text. @param username: the C{unicode} username of the commenter. @param about: Optionally, a C{list} of C{unicode} values the comment is about. @param importer: A C{unicode} string giving the name of the importer. @param when: A C{datetime.datetime} instance or C{None} if the current time should be used. @param url: A C{str} URL or C{None} if there is no URL associated with this comment. @raise L{FeatureError}: if (1) the comment text is C{None} or is all whitespace, or (2) if the importer name contains the separator (space) that we use in the about value for comment objects. @return: A C{dict} as follows: { fluidinfo.com/info/about: A C{list} of all the about values (i.e., URLs and hashtags) in the comment text, including the thing the comment was about (if anything). The hashtags are in lowercase. fluidinfo.com/info/timestamp: The C{int} UTC timestamp (seconds since the epoch) the comment was created at. fluidinfo.com/info/url: The C{url}, as received. fluidinfo.com/info/username: The C{username}, as received. } """ if not text or text.strip() == '': raise FeatureError('Comment text non-existent or just whitespace.') if importer: if ' ' in importer: raise FeatureError('Comment importer name contains a space.') else: importer = u'fluidinfo.com' when = when or datetime.utcnow() floatTime = timegm(when.utctimetuple()) + float(when.strftime('0.%f')) isoTime = when.isoformat() if not url: url = 'https://fluidinfo.com/comment/%s/%s/%s' % ( importer, username, isoTime) # Put all the explicit about values into a list called abouts. Items # are stripped and those that are not URLs are lowercased. abouts = [] if about: for item in map(unicode.strip, about): abouts.append(item if URL_REGEX.match(item) else item.lower()) abouts.extend(self._extractAbouts(text)) abouts = uniqueList(abouts) commentObjectAbout = u'%s %s %s' % (importer, username, isoTime) commentID = self._objects.create(commentObjectAbout) values = {u'fluidinfo.com/info/about': abouts, u'fluidinfo.com/info/username': username, u'fluidinfo.com/info/text': text, u'fluidinfo.com/info/url': url, u'fluidinfo.com/info/timestamp': floatTime} self._tagValues.set({commentID: values}) if abouts: # Get all the object IDs of the target objects. If an object does # not exist, create it. result = getAboutTagValues(values=abouts) existingObjects = dict(result.values(AboutTagValue.value, AboutTagValue.objectID)) missingAbouts = set(abouts) - set(existingObjects.iterkeys()) for aboutValue in missingAbouts: existingObjects[aboutValue] = self._objects.create(aboutValue) createComment(commentID, existingObjects.values(), username, when) return values