Exemple #1
0
def glossary(request):
    terms = sorted(
        list(Glossary.Query(request.redis_conn, request.mission.name).items()),
        key=lambda term: term.abbr,
    )

    return render_to_response(
        'glossary/glossary.html',
        {
            'terms': terms,
        },
        context_instance=RequestContext(request),
    )
Exemple #2
0
def glossary_link(match, request):
    # Try to look up the definition
    gitem = None
    if request:
        try:
            gitem = Glossary(request.redis_conn, request.mission.name,
                             match.group(1))
        except ValueError:
            title = ""
            more_information = True
        else:
            title = gitem.description
            more_information = bool(gitem.extended_description)
            tag = 'abbr' if gitem.type == 'abbreviation' else 'i'
    else:
        title = ""
        more_information = True

    try:
        # full syntax [glossary:term|display]
        display = match.group(2)
    except IndexError:
        # abbreviated syntax [glossary:term]
        display = match.group(1)

    if title:
        display = u"<%(tag)s class='jargon' title='%(title)s'>%(text)s</%(tag)s>" % {
            "tag": tag,
            "title": title,
            "text": display,
        }

    if more_information:
        if gitem is not None:
            return u"<a href='%s#%s'>%s</a>" % (
                reverse("glossary"),
                gitem.slug,
                display,
            )
        else:
            return u"<a href='%s#%s'>%s</a>" % (
                reverse("glossary"),
                slugify(match.group(1)),
                display,
            )
    else:
        return display
Exemple #3
0
    def index(self):
        current_labels = {}
        current_transcript_page = None
        current_page = 1
        current_page_lines = 0
        current_lang = None
        last_act = None
        previous_log_line_id = None
        previous_timestamp = None
        launch_time = int(
            self.redis_conn.hget("mission:%s" % self.mission_name,
                                 "utc_launch_time"))
        acts = list(Act.Query(self.redis_conn, self.mission_name))
        key_scenes = list(KeyScene.Query(self.redis_conn, self.mission_name))
        glossary_items = dict([
            (item.identifier.lower(), item)
            for item in Glossary.Query(self.redis_conn, self.mission_name)
        ])
        for chunk in self.parser.get_chunks():
            timestamp = chunk['timestamp']
            log_line_id = "%s:%i" % (self.transcript_name, timestamp)
            if timestamp <= previous_timestamp:
                raise Exception, "%s should be after %s" % (
                    seconds_to_timestamp(timestamp),
                    seconds_to_timestamp(previous_timestamp))
            # See if there's transcript page info, and update it if so
            if chunk['meta'].get('_page', 0):
                current_transcript_page = int(chunk["meta"]['_page'])
            if chunk['meta'].get('_lang', None):
                current_lang = chunk['meta']['_lang']
            if current_transcript_page:
                self.redis_conn.set("log_line:%s:page" % log_line_id,
                                    current_transcript_page)
            # Look up the act
            for act in acts:
                if act.includes(timestamp):
                    break
            else:
                print "Error: No act for timestamp %s" % seconds_to_timestamp(
                    timestamp)
                continue
            # If we've filled up the current page, go to a new one
            if current_page_lines >= self.LINES_PER_PAGE or (
                    last_act is not None and last_act != act):
                current_page += 1
                current_page_lines = 0
            last_act = act
            # First, create a record with some useful information
            info_key = "log_line:%s:info" % log_line_id
            info_record = {
                "offset": chunk['offset'],
                "page": current_page,
                "act": act.number,
                "utc_time": launch_time + timestamp,
            }
            if current_transcript_page:
                info_record["transcript_page"] = current_transcript_page
            if current_lang:
                info_record["lang"] = current_lang
            # And an editorial note if present
            if '_note' in chunk['meta']:
                info_record["note"] = chunk['meta']['_note']

            self.redis_conn.hmset(
                info_key,
                info_record,
            )
            # Look up the key scene
            for key_scene in key_scenes:
                if key_scene.includes(timestamp):
                    self.redis_conn.hset(info_key, 'key_scene',
                                         key_scene.number)
                    break
            # Create the doubly-linked list structure
            if previous_log_line_id:
                self.redis_conn.hset(
                    info_key,
                    "previous",
                    previous_log_line_id,
                )
                self.redis_conn.hset(
                    "log_line:%s:info" % previous_log_line_id,
                    "next",
                    log_line_id,
                )
            previous_log_line_id = log_line_id
            previous_timestamp = timestamp
            # Also store the text
            text = u""
            for line in chunk['lines']:
                self.redis_conn.rpush(
                    "log_line:%s:lines" % log_line_id,
                    u"%(speaker)s: %(text)s" % line,
                )
                text += "%s %s" % (line['speaker'], line['text'])
            # Store any images
            for i, image in enumerate(chunk['meta'].get("_images", [])):
                # Make the image id
                image_id = "%s:%s" % (log_line_id, i)
                # Push it onto the images list
                self.redis_conn.rpush(
                    "log_line:%s:images" % log_line_id,
                    image_id,
                )
                # Store the image data
                self.redis_conn.hmset(
                    "image:%s" % image_id,
                    image,
                )
            # Add that logline ID for the people involved
            speakers = set([line['speaker'] for line in chunk['lines']])
            for speaker in speakers:
                self.redis_conn.sadd("speaker:%s" % speaker, log_line_id)
            # Add it to the index for this page
            self.redis_conn.rpush(
                "page:%s:%i" % (self.transcript_name, current_page),
                log_line_id)
            # Add it to the index for this transcript page
            self.redis_conn.rpush(
                "transcript_page:%s:%s" %
                (self.transcript_name, current_transcript_page), log_line_id)
            # Add it into the transcript and everything sets
            self.redis_conn.zadd("log_lines:%s" % self.mission_name,
                                 log_line_id, chunk['timestamp'])
            self.redis_conn.zadd("transcript:%s" % self.transcript_name,
                                 log_line_id, chunk['timestamp'])
            # Read the new labels into current_labels
            has_labels = False
            if '_labels' in chunk['meta']:
                for label, endpoint in chunk['meta']['_labels'].items():
                    if endpoint is not None and label not in current_labels:
                        current_labels[label] = endpoint
                    elif label in current_labels:
                        current_labels[label] = max(current_labels[label],
                                                    endpoint)
                    elif endpoint is None:
                        self.redis_conn.sadd("label:%s" % label, log_line_id)
                        has_labels = True
            # Expire any old labels
            for label, endpoint in current_labels.items():
                if endpoint < chunk['timestamp']:
                    del current_labels[label]
            # Apply any surviving labels
            for label in current_labels:
                self.redis_conn.sadd("label:%s" % label, log_line_id)
                has_labels = True
            # And add this logline to search index
            if has_labels:
                print "weight = 3 for %s" % log_line_id
                weight = 3.0  # magic!
            else:
                weight = 1.0
            self.add_to_search_index(
                mission=self.mission_name,
                id=log_line_id,
                chunk=chunk,
                weight=weight,
                timestamp=timestamp,
            )
            # For any mentioned glossary terms, add to them.
            for word in text.split():
                word = word.strip(",;-:'\"").lower()
                if word in glossary_items:
                    glossary_item = glossary_items[word]
                    self.redis_conn.hincrby(
                        "glossary:%s" % glossary_item.id,
                        "times_mentioned",
                        1,
                    )
            # Increment the number of log lines we've done
            current_page_lines += len(chunk['lines'])
        pages_set = self.redis_conn.hexists("pages:%s" % self.mission_name,
                                            self.transcript_name)
        if not pages_set and current_transcript_page:
            print "%s original pages: %d" % (self.transcript_name,
                                             current_transcript_page)
            self.redis_conn.hset("pages:%s" % self.mission_name,
                                 self.transcript_name, current_transcript_page)
Exemple #4
0
 def _query(self):
     return Glossary.Query(self.request.redis_conn,
                           self.request.mission.name)