Example #1
0
def main(argv):
    global maxId

    # Named Entity ids: <Name, id>
    ids = loadDataSet()
    maxId = getMaxId(ids)
    newRelations = []
    nlp = spacy.load("en_core_web_sm")

    # Processes all wikipedia articles and prompts user to annotate for relations
    for entry in os.scandir('../wikipediaArticles/'):
        if entry.path.endswith('.txt') and entry.is_file():
            lines = loadFile(entry.path)
            for line in lines:
                doc = nlp(line)

                for idx, token in enumerate(doc):
                    print(idx, token, sep='_', end=' ')

                print('\n')
                print(doc)
                tokens = [token.text for token in doc]
                addRelations(tokens, ids, newRelations)

                # Write new relations to data file
                for relation in newRelations:
                    with open('newRelations.txt', 'a') as the_file:
                        the_file.write(json.dumps(jsons.dump(relation)) + '\n')
Example #2
0
    def getFontPostscriptName(self, filename):
        # we load at most 10 MB to avoid a denial-of-service attack by
        # passing around scripts containing references to fonts with
        # filenames like "/dev/zero" etc. no real font that I know of is
        # this big so it shouldn't hurt.
        fontProgram = util.loadFile(filename, cfgFrame, 10 * 1024 * 1024)

        if fontProgram is None:
            return ""

        f = truetype.Font(fontProgram)

        if not f.isOK():
            wx.MessageBox(
                "File '%s'\n" "does not appear to be a valid TrueType font." % filename, "Error", wx.OK, cfgFrame
            )

            return ""

        if not f.allowsEmbedding():
            wx.MessageBox(
                "Font '%s'\n"
                "does not allow embedding in its license terms.\n"
                "You may encounter problems using this font"
                " embedded." % filename,
                "Error",
                wx.OK,
                cfgFrame,
            )

        return f.getPostscriptName()
Example #3
0
def importAstx(fileName, frame):
    # astx files are xml files. The textlines can be found under
    # AdobeStory/document/stream/section/scene/paragraph which contain
    # one or more textRun/break elements, to be joined. The paragraph
    # attribute "element" gives us the element style.

    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    elemMap = {
        "Action" : screenplay.ACTION,
        "Character" : screenplay.CHARACTER,
        "Dialog" : screenplay.DIALOGUE,
        "Parenthetical" : screenplay.PAREN,
        "SceneHeading" : screenplay.SCENE,
        "Shot" : screenplay.SHOT,
        "Transition" : screenplay.TRANSITION,
    }

    try:
        root = etree.XML(data)
    except etree.XMLSyntaxError, e:
        wx.MessageBox("Error parsing file: %s" %e, "Error", wx.OK, frame)
        return None
Example #4
0
def importAstx(fileName, frame):
    # astx files are xml files. The textlines can be found under
    # AdobeStory/document/stream/section/scene/paragraph which contain
    # one or more textRun/break elements, to be joined. The paragraph
    # attribute "element" gives us the element style.

    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    elemMap = {
        "Action": screenplay.ACTION,
        "Character": screenplay.CHARACTER,
        "Dialog": screenplay.DIALOGUE,
        "Parenthetical": screenplay.PAREN,
        "SceneHeading": screenplay.SCENE,
        "Shot": screenplay.SHOT,
        "Transition": screenplay.TRANSITION,
    }

    try:
        root = etree.XML(data)
    except etree.XMLSyntaxError, e:
        wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame)
        return None
Example #5
0
def baselinermse(namesearch, directories, isArm=True):
    filenames = list(set(util.getFilenames(directories, parentDir=util.comparisonDir, namesearch=namesearch)))
    rmse = []
    rmseZero = []
    rmseMean = []

    for filename in filenames:
        data = util.loadFile(filename)
        predForces = np.array(data['predForces'])
        predActivations = np.array(data['predActivations'])
        yForces = np.array(data['yForces'])
        yActivations = np.array(data['yActivations'])

        predForces[predActivations < 0] = 0
        predForces[predForces < 0] = 0
        yForces[yActivations < 0] = 0

        rmse.append(np.sqrt(np.mean(np.square(yForces - predForces))))
        rmseZero.append(np.sqrt(np.mean(np.square(yForces))))
        rmseMean.append(np.sqrt(np.mean(np.square(yForces - np.mean(yForces)))))

    print '-'*5, 'Baseline Gown Simulation' if isArm else 'Baseline Shorts Simulation', '-'*5
    print 'RMSE across all %d sequences:' % len(filenames), np.mean(rmse)
    print 'RMSE for estimation of zero:', np.mean(rmseZero)
    print 'RMSE for estimation of mean of sequence:', np.mean(rmseMean)
    print 'RMSE standard deviation:', np.std(rmse)
    return np.mean(rmse)
Example #6
0
def importCeltx(fileName, frame):
    # Celtx files are zipfiles, and the script content is within a file
    # called "script-xxx.html", where xxx can be random.

    # the 5 MB limit is arbitrary, we just want to avoid getting a
    # MemoryError exception for /dev/zero etc.
    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    buf = StringIO.StringIO(data)

    try:
        z = zipfile.ZipFile(buf)
    except:
        wx.MessageBox("File is not a valid Celtx script file.", "Error", wx.OK,
                      frame)
        return None

    files = z.namelist()
    scripts = [s for s in files if s.startswith("script")]

    if len(scripts) == 0:
        wx.MessageBox("Unable to find script in this Celtx file.", "Error",
                      wx.OK, frame)
        return None

    f = z.open(scripts[0])
    content = f.read()
    z.close()

    if not content:
        wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame)
        return None

    elemMap = {
        "action": screenplay.ACTION,
        "character": screenplay.CHARACTER,
        "dialog": screenplay.DIALOGUE,
        "parenthetical": screenplay.PAREN,
        "sceneheading": screenplay.SCENE,
        "shot": screenplay.SHOT,
        "transition": screenplay.TRANSITION,
        "act": screenplay.ACTBREAK,
    }

    try:
        parser = etree.HTMLParser()
        root = etree.XML(content, parser)
    except etree.XMLSyntaxError, e:
        wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame)
        return None
Example #7
0
def importCeltx(fileName, frame):
    # Celtx files are zipfiles, and the script content is within a file
    # called "script-xxx.html", where xxx can be random.

    # the 5 MB limit is arbitrary, we just want to avoid getting a
    # MemoryError exception for /dev/zero etc.
    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    buf = StringIO.StringIO(data)

    try:
        z = zipfile.ZipFile(buf)
    except:
        wx.MessageBox("File is not a valid Celtx script file.", "Error", wx.OK, frame)
        return None

    files = z.namelist()
    scripts = [s for s in files if s.startswith("script") ]

    if len(scripts) == 0:
        wx.MessageBox("Unable to find script in this Celtx file.", "Error", wx.OK, frame)
        return None

    f = z.open(scripts[0])
    content = f.read()
    z.close()

    if not content:
        wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame)
        return None

    elemMap = {
        "action" : screenplay.ACTION,
        "character" : screenplay.CHARACTER,
        "dialog" : screenplay.DIALOGUE,
        "parenthetical" : screenplay.PAREN,
        "sceneheading" : screenplay.SCENE,
        "shot" : screenplay.SHOT,
        "transition" : screenplay.TRANSITION,
        "act" : screenplay.ACTBREAK,
    }

    try:
        parser = etree.HTMLParser()
        root = etree.XML(content, parser)
    except etree.XMLSyntaxError, e:
        wx.MessageBox("Error parsing file: %s" %e, "Error", wx.OK, frame)
        return None
Example #8
0
def npz_load(inp, name):
    data = util.loadFile(inp)

    start_time = util.now()
    print("[Loading %s:%s...]" % (inp, name), )
    sys.stdout.flush()

    loaded = data[name]
    print("[Took %d milliseconds]" % (util.now() - start_time))
    return loaded
Example #9
0
def process(args):
	f = util.loadFile(args.input)
	output_dir = './output'
	if args.output:
		output_dir = args.output

	util.writeDir(output_dir)
		
	
	#encoding
	print('---encoding')
	e = encoding.encode(f, args.token)
	userID = e.get_userID()
	itemID = e.get_itemID()
	adjlist = e.get_adjlist()
	user_train, item_train, value_train = e.output4FM()
	
	if args.format == 'FM': 
		if args.sampling == True:
			#zero sampling
			print('---Zero samping')
			zero_user, zero_item, zero_value = sampling.get_zero( sampling.zeroSampling(adjlist) )
			user_train.extend(zero_user)
			item_train.extend(zero_item)
			value_train.extend(zero_value)


		#Testing
		print('---Create Testing Data')
		test_user, test_item, test_value = build.build(len(userID), len(itemID))

	#save
	print('---Save')
	util.saveFile('{0}/userID'.format(output_dir), userID)
	util.saveFile('{0}/itemID'.format(output_dir), itemID)

	if args.format == 'deepwalk-bipartite':
		#deepwalk
		util.saveFile('{0}/adjlist'.format(output_dir), adjlist)
	elif args.format == 'FM':
		#FM
		util.saveFile('{0}/rel-user'.format(output_dir), ['0 {0}:1'.format(i) for i in range(len(userID))])
		util.saveFile('{0}/rel-item'.format(output_dir), ['0 {0}:1'.format(i) for i in range(len(itemID))])
		util.saveFile('{0}/rel-user.train'.format(output_dir), user_train)
		util.saveFile('{0}/rel-item.train'.format(output_dir), item_train)
		util.saveFile('{0}/ans.train'.format(output_dir), value_train)
		util.saveFile('{0}/rel-user.test'.format(output_dir), test_user)
		util.saveFile('{0}/rel-item.test'.format(output_dir), test_item)
		util.saveFile('{0}/ans.test'.format(output_dir), test_value)
Example #10
0
def importFadein(fileName, frame):
    # Fadein file is a zipped document.xml file.
    # the .xml is in open screenplay format:
    # http://sourceforge.net/projects/openscrfmt/files/latest/download

    # the 5 MB limit is arbitrary, we just want to avoid getting a
    # MemoryError exception for /dev/zero etc.
    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    buf = StringIO.StringIO(data)

    try:
        z = zipfile.ZipFile(buf)
        f = z.open("document.xml")
        content = f.read()
        z.close()
    except:
        wx.MessageBox("File is not a valid .fadein file.", "Error", wx.OK,
                      frame)
        return None

    if not content:
        wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame)
        return None

    elemMap = {
        "Action": screenplay.ACTION,
        "Character": screenplay.CHARACTER,
        "Dialogue": screenplay.DIALOGUE,
        "Parenthetical": screenplay.PAREN,
        "Scene Heading": screenplay.SCENE,
        "Shot": screenplay.SHOT,
        "Transition": screenplay.TRANSITION,
    }

    try:
        root = etree.XML(content)
    except etree.XMLSyntaxError, e:
        wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame)
        return None
Example #11
0
def importFadein(fileName, frame):
    # Fadein file is a zipped document.xml file.
    # the .xml is in open screenplay format:
    # http://sourceforge.net/projects/openscrfmt/files/latest/download

    # the 5 MB limit is arbitrary, we just want to avoid getting a
    # MemoryError exception for /dev/zero etc.
    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    buf = StringIO.StringIO(data)

    try:
        z = zipfile.ZipFile(buf)
        f = z.open("document.xml")
        content = f.read()
        z.close()
    except:
        wx.MessageBox("File is not a valid .fadein file.", "Error", wx.OK, frame)
        return None

    if not content:
        wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame)
        return None

    elemMap = {
        "Action" : screenplay.ACTION,
        "Character" : screenplay.CHARACTER,
        "Dialogue" : screenplay.DIALOGUE,
        "Parenthetical" : screenplay.PAREN,
        "Scene Heading" : screenplay.SCENE,
        "Shot" : screenplay.SHOT,
        "Transition" : screenplay.TRANSITION,
    }

    try:
        root = etree.XML(content)
    except etree.XMLSyntaxError, e:
        wx.MessageBox("Error parsing file: %s" %e, "Error", wx.OK, frame)
        return None
Example #12
0
def loadDataSet():
    print('Loading dataset...')
    ids = {}
    for entry in os.scandir(
            './'):  #debug: change this back to the wiki80 filepath
        if entry.path.endswith('.txt') and entry.is_file():
            lines = loadFile(entry.path)
            for line in lines:
                # Parse json
                ex = json.loads(line)

                # add to entity ids
                if ex['h']['name'] not in ids:
                    ids[ex['h']['name']] = ex['h']['id']

                if ex['t']['name'] not in ids:
                    ids[ex['t']['name']] = ex['t']['id']

    return ids
Example #13
0
def plotComparison(namesearch, directories, title, plottag='', isArm=True, xRotation=False, yRotation=False, zRotation=False):
    filenames = list(set(util.getFilenames(directories, parentDir=util.comparisonDir, namesearch=namesearch)))
    rmse = dict()

    for filename in filenames:
        data = util.loadFile(filename)
        if xRotation or yRotation or zRotation:
            if isArm:
                velocity = data['rotateFist'][-1]
            else:
                velocity = data['rotateArm'][-1]
        else:
            velocity = float(filename[-9:-5])

        predForces = np.array(data['predForces'])
        predActivations = np.array(data['predActivations'])
        yForces = np.array(data['yForces'])
        yActivations = np.array(data['yActivations'])

        predForces[predActivations < 0] = 0
        predForces[predForces < 0] = 0
        yForces[yActivations < 0] = 0

        if velocity not in rmse:
            rmse[velocity] = []
        rmse[velocity].append(np.sqrt(np.mean(np.square(yForces - predForces))))

    print '-'*5, 'Gown Simulation Variation' if isArm else 'Shorts Simulation Variation', '-'*5

    xVel = []
    yRMSE = []
    for vel, msevalues in rmse.iteritems():
        xVel.append(vel)
        yRMSE.append(np.mean(msevalues))
    xVel, yRMSE = (list(t) for t in zip(*sorted(zip(xVel, yRMSE))))
    if xRotation or yRotation or zRotation:
        xVel = np.degrees(xVel)
        print 'RMSE %s rotations:' % ('x' if xRotation else 'y' if yRotation else 'z'), xVel
    else:
        print 'RMSE velocities:', xVel
    print 'RMSE:', yRMSE

    return xVel, yRMSE
Example #14
0
    def loadQuotes(parent):
        try:
            data = util.loadFile(misc.getFullPath("resources/quotes.txt"),
                                 parent)
            if data is None:
                return

            data = data.decode("utf-8")
            lines = data.splitlines()

            quotes = []

            # lines saved for current quote being processed
            tmp = []

            for i, line in enumerate(lines):
                if line.startswith(u"#") or not line.strip():
                    continue

                if line.startswith(u"  "):
                    if not tmp:
                        raise Exception(
                            "No lines defined for quote at line %d" % (i + 1))

                    if len(tmp) > 3:
                        raise Exception(
                            "Too many lines defined for quote at line %d" %
                            (i + 1))

                    quotes.append(Quote(line.strip(), tmp))
                    tmp = []
                else:
                    tmp.append(line.strip())

            if tmp:
                raise Exception("Last quote does not have source")

            SplashWindow.quotes = quotes

        except Exception, e:
            wx.MessageBox("Error loading quotes: %s" % str(e), "Error", wx.OK,
                          parent)
Example #15
0
    def loadQuotes(parent):
        try:
            data = util.loadFile(misc.getFullPath("resources/quotes.txt"), parent)
            if data is None:
                return

            data = data.decode("utf-8")
            lines = data.splitlines()

            quotes = []

            # lines saved for current quote being processed
            tmp = []

            for i, line in enumerate(lines):
                if line.startswith(u"#") or not line.strip():
                    continue

                if line.startswith(u"  "):
                    if not tmp:
                        raise Exception("No lines defined for quote at line %d" % (i + 1))

                    if len(tmp) > 3:
                        raise Exception("Too many lines defined for quote at line %d" % (i + 1))

                    quotes.append(Quote(line.strip(), tmp))
                    tmp = []
                else:
                    tmp.append(line.strip())

            if tmp:
                raise Exception("Last quote does not have source")

            SplashWindow.quotes = quotes

        except Exception, e:
            wx.MessageBox("Error loading quotes: %s" % str(e), "Error", wx.OK, parent)
Example #16
0
def multiBatch(directories, seqCount=0):
    '''
    Loads several mini-batches at a time and yields all of the data as a single dataset, X, and single label set, Y.
    When seqCount = 0, all available sequences are returned at once.
    '''
    # Get all mini-batch files within the specified directories
    filenames = util.getFilenames(directories, parentDir=util.batchDir)
    # Loop over all mini-batch files and cumulate all data into a single dataset
    X = []
    Y = []
    for i, filename in enumerate(filenames):
        # Load a mini-batch from file and combine data
        data = util.loadFile(filename)
        X.extend(data['X'].tolist())
        Y.extend(data['Y'].tolist())
        if seqCount > 0 and len(X) / seqCount >= 1:
            # Yield this collection of batches, then empty X, Y for the next collection of batches
            data = None
            yield np.array(X), np.array(Y)
            X = []
            Y = []
    if X:
        # Yield any remaining data
        yield np.array(X), np.array(Y)
Example #17
0
def importFDX(fileName, frame):
    elemMap = {
        "Action" : screenplay.ACTION,
        "Character" : screenplay.CHARACTER,
        "Dialogue" : screenplay.DIALOGUE,
        "Parenthetical" : screenplay.PAREN,
        "Scene Heading" : screenplay.SCENE,
        "Shot" : screenplay.SHOT,
        "Transition" : screenplay.TRANSITION,
    }

    # the 5 MB limit is arbitrary, we just want to avoid getting a
    # MemoryError exception for /dev/zero etc.
    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    try:
        root = etree.XML(data)
        lines = []

        for para in root.xpath("Content//Paragraph"):
            et = para.get("Type")

            # "General" has embedded Dual Dialogue paragraphs inside it;
            # nothing to do for the General element itself.
            if et == "General":
                continue

            # all unknown linetypes are converted to Action
            lt = elemMap.get(et, screenplay.ACTION)

            s = u""
            for text in para.xpath("Text"):
                # text.text is None for paragraphs with no text, and +=
                # blows up trying to add a string object and None, so
                # guard against that
                if text.text:
                    s += text.text

            # FD uses some fancy unicode apostrophe, replace it with a
            # normal one
            s = s.replace(u"\u2019", "'")

            s = util.toInputStr(util.toLatin1(s))

            lines.append(screenplay.Line(screenplay.LB_LAST, lt, s))

        if len(lines) == 0:
            wx.MessageBox("The file contains no importable lines", "Error", wx.OK, frame)
            return None

        return lines

    except etree.XMLSyntaxError, e:
        wx.MessageBox("Error parsing file: %s" %e, "Error", wx.OK, frame)
        return None
Example #18
0
def importTextFile(fileName, frame):

    # the 1 MB limit is arbitrary, we just want to avoid getting a
    # MemoryError exception for /dev/zero etc.
    data = util.loadFile(fileName, frame, 1000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    data = util.fixNL(data)
    lines = data.split("\n")

    tabWidth = 4

    # key = indent level, value = Indent
    indDict = {}

    for i in range(len(lines)):
        s = util.toInputStr(lines[i].rstrip().expandtabs(tabWidth))

        # don't count empty lines towards indentation statistics
        if s.strip() == "":
            lines[i] = ""

            continue

        cnt = util.countInitial(s, " ")

        ind = indDict.get(cnt)
        if not ind:
            ind = Indent(cnt)
            indDict[cnt] = ind

        tmp = s.upper()

        if util.multiFind(tmp, ["EXT.", "INT."]):
            ind.sceneStart += 1

        if util.multiFind(tmp, ["CUT TO:", "DISSOLVE TO:"]):
            ind.trans += 1

        if re.match(r"^ +\(.*\)$", tmp):
            ind.paren += 1

        ind.lines.append(s.lstrip())
        lines[i] = s

    if len(indDict) == 0:
        wx.MessageBox("File contains only empty lines.", "Error", wx.OK, frame)

        return None

    # scene/action indent
    setType(SCENE_ACTION, indDict, lambda v: v.sceneStart)

    # indent with most lines is dialogue in non-pure-action scripts
    setType(screenplay.DIALOGUE, indDict, lambda v: len(v.lines))

    # remaining indent with lines is character most likely
    setType(screenplay.CHARACTER, indDict, lambda v: len(v.lines))

    # transitions
    setType(screenplay.TRANSITION, indDict, lambda v: v.trans)

    # parentheticals
    setType(screenplay.PAREN, indDict, lambda v: v.paren)

    # some text files have this type of parens:
    #
    #        JOE
    #      (smiling and
    #       hopping along)
    #
    # this handles them.
    parenIndent = findIndent(indDict, lambda v: v.lt == screenplay.PAREN)
    if parenIndent != -1:
        paren2Indent = findIndent(
            indDict, lambda v, var: (v.lt == -1) and (v.indent == var),
            parenIndent + 1)

        if paren2Indent != -1:
            indDict[paren2Indent].lt = screenplay.PAREN

    # set line type to ACTION for any indents not recognized
    for v in indDict.itervalues():
        if v.lt == -1:
            v.lt = screenplay.ACTION

    dlg = ImportDlg(frame, indDict.values())

    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()

        return None

    dlg.Destroy()

    ret = []

    for i in range(len(lines)):
        s = lines[i]
        cnt = util.countInitial(s, " ")
        s = s.lstrip()
        sUp = s.upper()

        if s:
            lt = indDict[cnt].lt

            if lt == IGNORE:
                continue

            if lt == SCENE_ACTION:
                if s.startswith("EXT.") or s.startswith("INT."):
                    lt = screenplay.SCENE
                else:
                    lt = screenplay.ACTION

            if ret and (ret[-1].lt != lt):
                ret[-1].lb = screenplay.LB_LAST

            if lt == screenplay.CHARACTER:
                if sUp.endswith("(CONT'D)"):
                    s = sUp[:-8].rstrip()

            elif lt == screenplay.PAREN:
                if s == "(continuing)":
                    s = ""

            if s:
                line = screenplay.Line(screenplay.LB_SPACE, lt, s)
                ret.append(line)

        elif ret:
            ret[-1].lb = screenplay.LB_LAST

    if len(ret) == 0:
        ret.append(screenplay.Line(screenplay.LB_LAST, screenplay.ACTION))

    # make sure the last line ends an element
    ret[-1].lb = screenplay.LB_LAST

    return ret
Example #19
0
def importFountain(fileName, frame):
    # regular expressions for fountain markdown.
    # https://github.com/vilcans/screenplain/blob/master/screenplain/richstring.py
    ire = re.compile(
        # one star
        r'\*'
        # anything but a space, then text
        r'([^\s].*?)'
        # finishing with one star
        r'\*'
        # must not be followed by star
        r'(?!\*)')
    bre = re.compile(
        # two stars
        r'\*\*'
        # must not be followed by space
        r'(?=\S)'
        # inside text
        r'(.+?[*_]*)'
        # finishing with two stars
        r'(?<=\S)\*\*')
    ure = re.compile(
        # underline
        r'_'
        # must not be followed by space
        r'(?=\S)'
        # inside text
        r'([^_]+)'
        # finishing with underline
        r'(?<=\S)_')
    boneyard_re = re.compile('/\\*.*?\\*/', flags=re.DOTALL)

    # random magicstring used to escape literal star '\*'
    literalstar = "Aq7RR"

    # returns s with markdown formatting removed.
    def unmarkdown(s):
        s = s.replace("\\*", literalstar)
        for style in (bre, ire, ure):
            s = style.sub(r'\1', s)
        return s.replace(literalstar, "*")

    data = util.loadFile(fileName, frame, 1000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)
        return None

    inf = []
    inf.append(misc.CheckBoxItem("Import titles as action lines."))
    inf.append(misc.CheckBoxItem("Remove unsupported formatting markup."))
    inf.append(misc.CheckBoxItem("Import section/synopsis as notes."))

    dlg = misc.CheckBoxDlg(frame, "Fountain import options", inf,
                           "Import options:", False)

    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None

    importTitles = inf[0].selected
    removeMarkdown = inf[1].selected
    importSectSyn = inf[2].selected

    # pre-process data - fix newlines, remove boneyard.
    data = util.fixNL(data)
    data = boneyard_re.sub('', data)
    prelines = data.split("\n")
    for i in xrange(len(prelines)):
        try:
            util.toLatin1(prelines[i])
        except:
            prelines[i] = util.cleanInput(
                u"" + prelines[i].decode('UTF-8', "ignore"))
    lines = []

    tabWidth = 4
    lns = []
    sceneStartsList = ("INT", "EXT", "EST", "INT./EXT", "INT/EXT", "I/E",
                       "I./E")
    TWOSPACE = "  "
    skipone = False

    # First check if title lines are present:
    c = 0
    while c < len(prelines):
        if prelines[c] != "":
            c = c + 1
        else:
            break

    # prelines[0:i] are the first bunch of lines, that could be titles.
    # Our check for title is simple:
    #   - the line does not start with 'fade'
    #   - the first line has a single ':'

    if c > 0:
        l = util.toInputStr(prelines[0].expandtabs(tabWidth).lstrip().lower())
        if not l.startswith("fade") and l.count(":") == 1:
            # these are title lines. Now do what the user requested.
            if importTitles:
                # add TWOSPACE to all the title lines.
                for i in xrange(c):
                    prelines[i] += TWOSPACE
            else:
                #remove these lines
                prelines = prelines[c + 1:]

    for l in prelines:
        if l != TWOSPACE:
            lines.append(util.toInputStr(l.expandtabs(tabWidth)))
        else:
            lines.append(TWOSPACE)

    linesLen = len(lines)

    def isPrevEmpty():
        if lns and lns[-1].text == "":
            return True
        return False

    def isPrevType(ltype):
        return (lns and lns[-1].lt == ltype)

    # looks ahead to check if next line is not empty
    def isNextEmpty(i):
        return (i + 1 < len(lines) and lines[i + 1] == "")

    def getPrevType():
        if lns:
            return lns[-1].lt
        else:
            return screenplay.ACTION

    def isParen(s):
        return (s.startswith('(') and s.endswith(')'))

    def isScene(s):
        if s.endswith(TWOSPACE):
            return False
        if s.startswith(".") and not s.startswith(".."):
            return True
        tmp = s.upper()
        if (re.match(r'^(INT|EXT|EST)[ .]', tmp)
                or re.match(r'^(INT\.?/EXT\.?)[ .]', tmp)
                or re.match(r'^I/E[ .]', tmp)):
            return True
        return False

    def isTransition(s):
        return ((s.isupper() and s.endswith("TO:"))
                or (s.startswith(">") and not s.endswith("<")))

    def isCentered(s):
        return s.startswith(">") and s.endswith("<")

    def isPageBreak(s):
        return s.startswith('===') and s.lstrip('=') == ''

    def isNote(s):
        return s.startswith("[[") and s.endswith("]]")

    def isSection(s):
        return s.startswith("#")

    def isSynopsis(s):
        return s.startswith("=") and not s.startswith("==")

    # first pass - identify linetypes
    for i in range(linesLen):
        if skipone:
            skipone = False
            continue

        s = lines[i]
        sl = s.lstrip()
        # mark as ACTION by default.
        line = screenplay.Line(screenplay.LB_FORCED, screenplay.ACTION, s)

        # Start testing lines for element type. Go in order:
        # Scene Character, Paren, Dialog, Transition, Note.

        if s == "" or isCentered(s) or isPageBreak(s):
            # do nothing - import as action.
            pass

        elif s == TWOSPACE:
            line.lt = getPrevType()

        elif isScene(s):
            line.lt = screenplay.SCENE
            if sl.startswith('.'):
                line.text = sl[1:]
            else:
                line.text = sl

        elif isTransition(sl) and isPrevEmpty() and isNextEmpty(i):
            line.lt = screenplay.TRANSITION
            if line.text.startswith('>'):
                line.text = sl[1:].lstrip()

        elif s.isupper() and isPrevEmpty() and not isNextEmpty(i):
            line.lt = screenplay.CHARACTER
            if s.endswith(TWOSPACE):
                line.lt = screenplay.ACTION

        elif isParen(sl) and (isPrevType(screenplay.CHARACTER)
                              or isPrevType(screenplay.DIALOGUE)):
            line.lt = screenplay.PAREN

        elif (isPrevType(screenplay.CHARACTER)
              or isPrevType(screenplay.DIALOGUE)
              or isPrevType(screenplay.PAREN)):
            line.lt = screenplay.DIALOGUE

        elif isNote(sl):
            line.lt = screenplay.NOTE
            line.text = sl.strip('[]')

        elif isSection(s) or isSynopsis(s):
            if not importSectSyn:
                if isNextEmpty(i):
                    skipone = True
                continue

            line.lt = screenplay.NOTE
            line.text = sl.lstrip('=#')

        if line.text == TWOSPACE:
            pass

        elif line.lt != screenplay.ACTION:
            line.text = line.text.lstrip()

        else:
            tmp = line.text.rstrip()
            # we don't support center align, so simply add required indent.
            if isCentered(tmp):
                tmp = tmp[1:-1].strip()
                width = frame.panel.ctrl.sp.cfg.getType(
                    screenplay.ACTION).width
                if len(tmp) < width:
                    tmp = ' ' * ((width - len(tmp)) // 2) + tmp
            line.text = tmp

        if removeMarkdown:
            line.text = unmarkdown(line.text)
            if line.lt == screenplay.CHARACTER and line.text.endswith('^'):
                line.text = line.text[:-1]

        lns.append(line)

    ret = []

    # second pass helper functions.
    def isLastLBForced():
        return ret and ret[-1].lb == screenplay.LB_FORCED

    def makeLastLBLast():
        if ret:
            ret[-1].lb = screenplay.LB_LAST

    def isRetPrevType(t):
        return ret and ret[-1].lt == t

    # second pass - remove unneeded empty lines, and fix the linebreaks.
    for ln in lns:
        if ln.text == '':
            if isLastLBForced():
                makeLastLBLast()
            else:
                ret.append(ln)

        elif not isRetPrevType(ln.lt):
            makeLastLBLast()
            ret.append(ln)

        else:
            ret.append(ln)

    makeLastLBLast()
    return ret
Example #20
0
def importFDX(fileName, frame):
    elemMap = {
        "Action": screenplay.ACTION,
        "Character": screenplay.CHARACTER,
        "Dialogue": screenplay.DIALOGUE,
        "Parenthetical": screenplay.PAREN,
        "Scene Heading": screenplay.SCENE,
        "Shot": screenplay.SHOT,
        "Transition": screenplay.TRANSITION,
    }

    # the 5 MB limit is arbitrary, we just want to avoid getting a
    # MemoryError exception for /dev/zero etc.
    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    try:
        root = etree.XML(data)
        lines = []

        def addElem(eleType, eleText):
            lns = eleText.split("\n")

            # if elem ends in a newline, last line is empty and useless;
            # get rid of it
            if not lns[-1] and (len(lns) > 1):
                lns = lns[:-1]

            for s in lns[:-1]:
                lines.append(
                    screenplay.Line(screenplay.LB_FORCED, eleType,
                                    util.cleanInput(s)))

            lines.append(
                screenplay.Line(screenplay.LB_LAST, eleType,
                                util.cleanInput(lns[-1])))

        for para in root.xpath("Content//Paragraph"):
            addedNote = False
            et = para.get("Type")

            # Check for script notes
            s = u""
            for notes in para.xpath("ScriptNote/Paragraph/Text"):
                if notes.text:
                    s += notes.text

                # FD has AdornmentStyle set to "0" on notes with newline.
                if notes.get("AdornmentStyle") == "0":
                    s += "\n"

            if s:
                addElem(screenplay.NOTE, s)
                addedNote = True

            # "General" has embedded Dual Dialogue paragraphs inside it;
            # nothing to do for the General element itself.
            #
            # If no type is defined (like inside scriptnote), skip.
            if (et == "General") or (et is None):
                continue

            s = u""
            for text in para.xpath("Text"):
                # text.text is None for paragraphs with no text, and +=
                # blows up trying to add a string object and None, so
                # guard against that
                if text.text:
                    s += text.text

            # don't remove paragraphs with no text, unless that paragraph
            # contained a scriptnote
            if s or not addedNote:
                lt = elemMap.get(et, screenplay.ACTION)
                addElem(lt, s)

        if len(lines) == 0:
            wx.MessageBox("The file contains no importable lines", "Error",
                          wx.OK, frame)
            return None

        return lines

    except etree.XMLSyntaxError, e:
        wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame)
        return None
Example #21
0
def importCeltx(fileName, frame):
    # Celtx files are zipfiles, and the script content is within a file
    # called "script-xxx.html", where xxx can be random.

    # the 5 MB limit is arbitrary, we just want to avoid getting a
    # MemoryError exception for /dev/zero etc.
    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    buf = io.StringIO(data)

    try:
        z = zipfile.ZipFile(buf)
    except:
        wx.MessageBox("File is not a valid Celtx script file.", "Error", wx.OK,
                      frame)
        return None

    files = z.namelist()
    scripts = [s for s in files if s.startswith("script")]

    if len(scripts) == 0:
        wx.MessageBox("Unable to find script in this Celtx file.", "Error",
                      wx.OK, frame)
        return None

    f = z.open(scripts[0])
    content = f.read()
    z.close()

    if not content:
        wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame)
        return None

    elemMap = {
        "action": screenplay.ACTION,
        "character": screenplay.CHARACTER,
        "dialog": screenplay.DIALOGUE,
        "parenthetical": screenplay.PAREN,
        "sceneheading": screenplay.SCENE,
        "shot": screenplay.SHOT,
        "transition": screenplay.TRANSITION,
        "act": screenplay.ACTBREAK,
    }

    try:
        parser = etree.HTMLParser()
        root = etree.XML(content, parser)
    except etree.XMLSyntaxError as e:
        wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame)
        return None

    lines = []

    def addElem(eleType, lns):
        # if elem ends in a newline, last line is empty and useless;
        # get rid of it
        if not lns[-1] and (len(lns) > 1):
            lns = lns[:-1]

        for s in lns[:-1]:
            lines.append(
                screenplay.Line(screenplay.LB_FORCED, eleType,
                                util.cleanInput(s)))

        lines.append(
            screenplay.Line(screenplay.LB_LAST, eleType,
                            util.cleanInput(lns[-1])))

    for para in root.xpath("/html/body/p"):
        items = []
        for line in para.itertext():
            items.append(str(line.replace("\n", " ")))

        lt = elemMap.get(para.get("class"), screenplay.ACTION)

        if items:
            addElem(lt, items)

    if len(lines) == 0:
        wx.MessageBox("The file contains no importable lines", "Error", wx.OK,
                      frame)
        return None

    return lines
Example #22
0
def importAstx(fileName, frame):
    # astx files are xml files. The textlines can be found under
    # AdobeStory/document/stream/section/scene/paragraph which contain
    # one or more textRun/break elements, to be joined. The paragraph
    # attribute "element" gives us the element style.

    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    elemMap = {
        "Action": screenplay.ACTION,
        "Character": screenplay.CHARACTER,
        "Dialog": screenplay.DIALOGUE,
        "Parenthetical": screenplay.PAREN,
        "SceneHeading": screenplay.SCENE,
        "Shot": screenplay.SHOT,
        "Transition": screenplay.TRANSITION,
    }

    try:
        root = etree.XML(data)
    except etree.XMLSyntaxError as e:
        wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame)
        return None

    lines = []

    def addElem(eleType, items):
        # if elem ends in a newline, last line is empty and useless;
        # get rid of it
        if not items[-1] and (len(items) > 1):
            items = items[:-1]

        for s in items[:-1]:
            lines.append(
                screenplay.Line(screenplay.LB_FORCED, eleType,
                                util.cleanInput(s)))

        lines.append(
            screenplay.Line(screenplay.LB_LAST, eleType,
                            util.cleanInput(items[-1])))

    for para in root.xpath(
            "/AdobeStory/document/stream/section/scene/paragraph"):
        lt = elemMap.get(para.get("element"), screenplay.ACTION)

        items = []
        s = ""

        for text in para:
            if text.tag == "textRun" and text.text:
                s += text.text
            elif text.tag == "break":
                items.append(s.rstrip())
                s = ""

        items.append(s.rstrip())

        addElem(lt, items)

    if not lines:
        wx.MessageBox("File has no content.", "Error", wx.OK, frame)
        return None

    return lines
Example #23
0
def main(argv):
    if len(sys.argv) < 2:
        print("pass filename")
        sys.exit(2)
    print("loading " + argv[0])

    texts = loadFile(argv[0])
    # debug
    # texts = ['Rami Eid is studying at Stony Brook University in New York.',
    #          'Blounts Creek is a small unincorporated rural community in Beaufort County, North Carolina, United States, near a creek with the same name.']

    # task 1
    #task1(texts[0])

    nlp = spacy.load("en_core_web_sm")
    for idx, doc in enumerate(nlp.pipe(texts, disable=["tagger", "parser"])):
        print("Named Entities:", [(ent.text, ent.label_) for ent in doc.ents])

        # Represent entity graph as dictionary: <Entity name, Node>
        nodes = buildEntityGraph(doc, texts[idx])

        # verifying graph
        print("Graph:")
        printGraph(nodes)

        # Find maximal cliques and clique weights
        print("BRON-KERBOSCH")
        sys.setrecursionlimit(2000)
        cliques = bron_kerbosch(list(nodes.values()))
        print("cliques:", cliques)

        # if the clique contains certain types of relations, then we fill them into the complex relation / template
        workTemplates = []
        partTemplates = []

        for clique in cliques:
            for node in clique:
                for edge in node.weightedEdges:
                    if edge.dst in clique:
                        tryAddWorkTemplate(edge, workTemplates)
                        tryAddPartTemplate(edge, partTemplates)
                        #tryAddBuyTemplate(edge, partTemplates)

        # verifying template filling
        for work in workTemplates:
            print('Work:',
                  work.person,
                  work.org,
                  work.title,
                  work.location,
                  sep=', ')

        for part in partTemplates:
            print(part.part, part.whole, sep=' part of ')

        # writing templates to json output
        out = []
        for template in workTemplates:
            arguments = {}
            arguments['1'] = template.person or ""
            arguments['2'] = template.org or ""
            arguments['3'] = template.title or ""
            arguments['4'] = template.location or ""

            extraction = Extraction('WORK', [token.text for token in doc],
                                    arguments)
            output = Output(argv[0], extraction)
            out.append(output)

        for template in partTemplates:
            arguments = {}
            arguments['1'] = template.part or ""
            arguments['2'] = template.whole or ""

            extraction = Extraction('PART', [token.text for token in doc],
                                    arguments)
            output = Output(argv[0], extraction)
            out.append(output)

        # Write new relations to data file
        jsons.suppress_warnings()
        for output in out:
            with open(str(argv[0])[:-4] + '.json', 'a') as the_file:
                the_file.write(json.dumps(jsons.dump(output)) + '\n')
Example #24
0
def importFadein(fileName, frame):
    # Fadein file is a zipped document.xml file.
    # the .xml is in open screenplay format:
    # http://sourceforge.net/projects/openscrfmt/files/latest/download

    # the 5 MB limit is arbitrary, we just want to avoid getting a
    # MemoryError exception for /dev/zero etc.
    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    buf = io.StringIO(data)

    try:
        z = zipfile.ZipFile(buf)
        f = z.open("document.xml")
        content = f.read()
        z.close()
    except:
        wx.MessageBox("File is not a valid .fadein file.", "Error", wx.OK,
                      frame)
        return None

    if not content:
        wx.MessageBox("Script seems to be empty.", "Error", wx.OK, frame)
        return None

    elemMap = {
        "Action": screenplay.ACTION,
        "Character": screenplay.CHARACTER,
        "Dialogue": screenplay.DIALOGUE,
        "Parenthetical": screenplay.PAREN,
        "Scene Heading": screenplay.SCENE,
        "Shot": screenplay.SHOT,
        "Transition": screenplay.TRANSITION,
    }

    try:
        root = etree.XML(content)
    except etree.XMLSyntaxError as e:
        wx.MessageBox("Error parsing file: %s" % e, "Error", wx.OK, frame)
        return None

    lines = []

    def addElem(eleType, lns):
        # if elem ends in a newline, last line is empty and useless;
        # get rid of it
        if not lns[-1] and (len(lns) > 1):
            lns = lns[:-1]

        for s in lns[:-1]:
            lines.append(
                screenplay.Line(screenplay.LB_FORCED, eleType,
                                util.cleanInput(s)))

        lines.append(
            screenplay.Line(screenplay.LB_LAST, eleType,
                            util.cleanInput(lns[-1])))

    # removes html formatting from s, and returns list of lines.
    # if s is None, return a list with single empty string.
    re_rem = [r'<font[^>]*>', r'<size[^>]*>', r'<bgcolor[^>]*>']
    rem = [
        "<b>", "</b>", "<i>", "</i>", "<u>", "</u>", "</font>", "</size>",
        "</bgcolor>"
    ]

    def sanitizeStr(s):
        if s:
            s = "" + s
            for r in re_rem:
                s = re.sub(r, "", s)
            for r in rem:
                s = s.replace(r, "")

            if s:
                return s.split("<br>")
            else:
                return [""]
        else:
            return [""]

    for para in root.xpath("paragraphs/para"):
        # check for notes/synopsis, import as Note.
        if para.get("note"):
            lt = screenplay.NOTE
            items = sanitizeStr("" + para.get("note"))
            addElem(lt, items)

        if para.get("synopsis"):
            lt = screenplay.NOTE
            items = sanitizeStr("" + para.get("synopsis"))
            addElem(lt, items)

        # look for the <style> and <text> tags. Bail if no <text> found.
        styl = para.xpath("style")
        txt = para.xpath("text")
        if txt:
            if styl:
                lt = elemMap.get(styl[0].get("basestylename"),
                                 screenplay.ACTION)
            else:
                lt = screenplay.ACTION

            items = sanitizeStr(txt[0].text)

            if (lt == screenplay.PAREN) and items and (items[0][0] != "("):
                items[0] = "(" + items[0]
                items[-1] = items[-1] + ")"
        else:
            continue

        addElem(lt, items)

    if len(lines) == 0:
        wx.MessageBox("The file contains no importable lines", "Error", wx.OK,
                      frame)
        return None

    return lines
Example #25
0
 def __init__(self, filestr):
     self.filestr = filestr
     self.data = util.loadFile("data/" + filestr)
from datetime import datetime

import util, datapreprocess
import build.pysim as pysim

'''
Used to compare how a trained LSTM model handels against various variations.
'''

# Deterministic output
np.random.seed(1000)

# Generate or load 128 randomly positioned locations
sequences = 128
if util.fileExists('randomPositionsSplines_Leg_%d_New' % sequences, '', util.comparisonDir):
    allArmPositions, allSplines = util.loadFile('randomPositionsSplines_Leg_%d_New' % sequences, '', util.comparisonDir)
    print 'Loaded arm positions and spline trajectories'
else:
    allArmPositions = [[np.random.uniform(-0.05, 0.05), np.random.uniform(-0.2, -0.05), np.random.uniform(-0.05, 0.05)] for i in xrange(sequences)]
    # sides = [np.random.randint(1, 2) for j in xrange(sequences)]
    allSplines = [[[np.random.uniform(-0.03, 0.03), -0.1 + np.random.uniform(-0.02, 0.02), (-1)**i * np.random.uniform(0, 0.05), 0, 1, 0, 0] for i in xrange(1, 25)] for j in xrange(sequences)]
    util.saveData('randomPositionsSplines_Leg_%d_New' % sequences, [allArmPositions, allSplines], '', util.comparisonDir)
    print 'Saved arm positions for later reference'
# Use velocities of Decrease: 50%, 10%, 5%, and Increase: 5%, 10%, 50%
velocities = [1.5, 1.75, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.75, 3.0]
rotateX = [0.0, -np.radians(1), -np.radians(5), -np.radians(10), -np.radians(15), np.radians(1), np.radians(5), np.radians(10), np.radians(15)]
# y axis variation (left and right, negative is right (wrt human))
rotateY = [0.0, -np.radians(1), -np.radians(5), -np.radians(10), -np.radians(15), np.radians(1), np.radians(5), np.radians(10), np.radians(15)]
# z axis variation (up and down, negative is down (wrt human))
rotateZ = [0.0, -np.radians(1), -np.radians(5), -np.radians(10), -np.radians(15), np.radians(1), np.radians(5), np.radians(10), np.radians(15)]
Example #27
0
def importTextFile(fileName, frame):

    # the 1 MB limit is arbitrary, we just want to avoid getting a
    # MemoryError exception for /dev/zero etc.
    data = util.loadFile(fileName, frame, 1000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    data = util.fixNL(data)
    lines = data.split("\n")

    tabWidth = 4

    # key = indent level, value = Indent
    indDict = {}

    for i in range(len(lines)):
        s = util.toInputStr(lines[i].rstrip().expandtabs(tabWidth))

        # don't count empty lines towards indentation statistics
        if s.strip() == "":
            lines[i] = ""

            continue

        cnt = util.countInitial(s, " ")

        ind = indDict.get(cnt)
        if not ind:
            ind = Indent(cnt)
            indDict[cnt] = ind

        tmp = s.upper()

        if util.multiFind(tmp, ["EXT.", "INT."]):
            ind.sceneStart += 1

        if util.multiFind(tmp, ["CUT TO:", "DISSOLVE TO:"]):
            ind.trans += 1

        if re.match(r"^ +\(.*\)$", tmp):
            ind.paren += 1

        ind.lines.append(s.lstrip())
        lines[i] = s

    if len(indDict) == 0:
        wx.MessageBox("File contains only empty lines.", "Error", wx.OK, frame)

        return None

    # scene/action indent
    setType(SCENE_ACTION, indDict, lambda v: v.sceneStart)

    # indent with most lines is dialogue in non-pure-action scripts
    setType(screenplay.DIALOGUE, indDict, lambda v: len(v.lines))

    # remaining indent with lines is character most likely
    setType(screenplay.CHARACTER, indDict, lambda v: len(v.lines))

    # transitions
    setType(screenplay.TRANSITION, indDict, lambda v: v.trans)

    # parentheticals
    setType(screenplay.PAREN, indDict, lambda v: v.paren)

    # some text files have this type of parens:
    #
    #        JOE
    #      (smiling and
    #       hopping along)
    #
    # this handles them.
    parenIndent = findIndent(indDict, lambda v: v.lt == screenplay.PAREN)
    if parenIndent != -1:
        paren2Indent = findIndent(indDict,
            lambda v, var: (v.lt == -1) and (v.indent == var),
            parenIndent + 1)

        if paren2Indent != -1:
            indDict[paren2Indent].lt = screenplay.PAREN

    # set line type to ACTION for any indents not recognized
    for v in indDict.itervalues():
        if v.lt == -1:
            v.lt = screenplay.ACTION

    dlg = ImportDlg(frame, indDict.values())

    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()

        return None

    dlg.Destroy()

    ret = []

    for i in range(len(lines)):
        s = lines[i]
        cnt = util.countInitial(s, " ")
        s = s.lstrip()
        sUp = s.upper()

        if s:
            lt = indDict[cnt].lt

            if lt == IGNORE:
                continue

            if lt == SCENE_ACTION:
                if s.startswith("EXT.") or s.startswith("INT."):
                    lt = screenplay.SCENE
                else:
                    lt = screenplay.ACTION

            if ret and (ret[-1].lt != lt):
                ret[-1].lb = screenplay.LB_LAST

            if lt == screenplay.CHARACTER:
                if sUp.endswith("(CONT'D)"):
                    s = sUp[:-8].rstrip()

            elif lt == screenplay.PAREN:
                if s == "(continuing)":
                    s = ""

            if s:
                line = screenplay.Line(screenplay.LB_SPACE, lt, s)
                ret.append(line)

        elif ret:
            ret[-1].lb = screenplay.LB_LAST

    if len(ret) == 0:
        ret.append(screenplay.Line(screenplay.LB_LAST, screenplay.ACTION))

    # make sure the last line ends an element
    ret[-1].lb = screenplay.LB_LAST

    return ret
Example #28
0
def importFountain(fileName, frame):
    # regular expressions for fountain markdown.
    # https://github.com/vilcans/screenplain/blob/master/screenplain/richstring.py
    ire = re.compile(
            # one star
            r'\*'
            # anything but a space, then text
            r'([^\s].*?)'
            # finishing with one star
            r'\*'
            # must not be followed by star
            r'(?!\*)'
        )
    bre = re.compile(
            # two stars
            r'\*\*'
            # must not be followed by space
            r'(?=\S)'
            # inside text
            r'(.+?[*_]*)'
            # finishing with two stars
            r'(?<=\S)\*\*'
        )
    ure = re.compile(
            # underline
            r'_'
            # must not be followed by space
            r'(?=\S)'
            # inside text
            r'([^_]+)'
            # finishing with underline
            r'(?<=\S)_'
        )
    boneyard_re = re.compile('/\\*.*?\\*/', flags=re.DOTALL)

    # random magicstring used to escape literal star '\*'
    literalstar = "Aq7RR"

    # returns s with markdown formatting removed.
    def unmarkdown(s):
        s = s.replace("\\*", literalstar)
        for style in (bre, ire, ure):
            s = style.sub(r'\1', s)
        return s.replace(literalstar, "*")

    data = util.loadFile(fileName, frame, 1000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)
        return None

    inf = []
    inf.append(misc.CheckBoxItem("Import titles as action lines."))
    inf.append(misc.CheckBoxItem("Remove unsupported formatting markup."))
    inf.append(misc.CheckBoxItem("Import section/synopsis as notes."))

    dlg = misc.CheckBoxDlg(frame, "Fountain import options", inf,
        "Import options:", False)

    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None

    importTitles = inf[0].selected
    removeMarkdown = inf[1].selected
    importSectSyn = inf[2].selected

    # pre-process data - fix newlines, remove boneyard.
    data = data.decode("utf-8")
    data = util.fixNL(data)
    data = boneyard_re.sub('', data)
    prelines = data.split("\n")
    lines = []

    tabWidth = 4
    lns = []
    sceneStartsList = ("INT", "EXT", "EST", "INT./EXT", "INT/EXT", "I/E", "I./E")
    TWOSPACE = "  "
    skipone = False

    # First check if title lines are present:
    c = 0
    while c < len(prelines):
        if prelines[c] != "":
            c = c+1
        else:
            break

    # prelines[0:i] are the first bunch of lines, that could be titles.
    # Our check for title is simple:
    #   - the line does not start with 'fade'
    #   - the first line has a single ':'

    if c > 0:
        l = util.toInputStr(prelines[0].expandtabs(tabWidth).lstrip().lower())
        if not l.startswith("fade") and l.count(":") == 1:
            # these are title lines. Now do what the user requested.
            if importTitles:
                # add TWOSPACE to all the title lines.
                for i in xrange(c):
                    prelines[i] += TWOSPACE
            else:
                #remove these lines
                prelines = prelines[c+1:]

    for l in prelines:
        if l != TWOSPACE:
            lines.append(util.toInputStr(l.expandtabs(tabWidth)))
        else:
            lines.append(TWOSPACE)

    linesLen = len(lines)

    def isPrevEmpty():
        if lns and lns[-1].text == "":
            return True
        return False

    def isPrevType(ltype):
        return (lns and lns[-1].lt == ltype)

    # looks ahead to check if next line is not empty
    def isNextEmpty(i):
        return  (i+1 < len(lines) and lines[i+1] == "")

    def getPrevType():
        if lns:
            return lns[-1].lt
        else:
            return screenplay.ACTION

    def isParen(s):
        return (s.startswith('(') and s.endswith(')'))

    def isScene(s):
        if s.endswith(TWOSPACE):
            return False
        if s.startswith(".") and not s.startswith(".."):
            return True
        tmp = s.upper()
        if (re.match(r'^(INT|EXT|EST)[ .]', tmp) or
            re.match(r'^(INT\.?/EXT\.?)[ .]', tmp) or
            re.match(r'^I/E[ .]', tmp)):
            return True
        return False

    def isTransition(s):
        return ((s.isupper() and s.endswith("TO:")) or
                (s.startswith(">") and not s.endswith("<")))

    def isCentered(s):
        return s.startswith(">") and s.endswith("<")

    def isPageBreak(s):
        return s.startswith('===') and s.lstrip('=') == ''

    def isNote(s):
        return s.startswith("[[") and s.endswith("]]")

    def isSection(s):
        return s.startswith("#")

    def isSynopsis(s):
        return s.startswith("=") and not s.startswith("==")

    # first pass - identify linetypes
    for i in range(linesLen):
        if skipone:
            skipone = False
            continue

        s = lines[i]
        sl = s.lstrip()
        # mark as ACTION by default.
        line = screenplay.Line(screenplay.LB_FORCED, screenplay.ACTION, s)

        # Start testing lines for element type. Go in order:
        # Scene Character, Paren, Dialog, Transition, Note.

        if s == "" or isCentered(s) or isPageBreak(s):
            # do nothing - import as action.
            pass

        elif s == TWOSPACE:
            line.lt = getPrevType()

        elif isScene(s):
            line.lt = screenplay.SCENE
            if sl.startswith('.'):
                line.text = sl[1:]
            else:
                line.text = sl

        elif isTransition(sl) and isPrevEmpty() and isNextEmpty(i):
            line.lt = screenplay.TRANSITION
            if line.text.startswith('>'):
                line.text = sl[1:].lstrip()

        elif s.isupper() and isPrevEmpty() and not isNextEmpty(i):
            line.lt = screenplay.CHARACTER
            if s.endswith(TWOSPACE):
                line.lt = screenplay.ACTION

        elif isParen(sl) and (isPrevType(screenplay.CHARACTER) or
                                isPrevType(screenplay.DIALOGUE)):
            line.lt = screenplay.PAREN

        elif (isPrevType(screenplay.CHARACTER) or
             isPrevType(screenplay.DIALOGUE) or
             isPrevType(screenplay.PAREN)):
            line.lt = screenplay.DIALOGUE

        elif isNote(sl):
            line.lt = screenplay.NOTE
            line.text = sl.strip('[]')

        elif isSection(s) or isSynopsis(s):
            if not importSectSyn:
                if isNextEmpty(i):
                    skipone = True
                continue

            line.lt = screenplay.NOTE
            line.text = sl.lstrip('=#')

        if line.text == TWOSPACE:
            pass

        elif line.lt != screenplay.ACTION:
            line.text = line.text.lstrip()

        else:
            tmp = line.text.rstrip()
            # we don't support center align, so simply add required indent.
            if isCentered(tmp):
                tmp = tmp[1:-1].strip()
                width = frame.panel.ctrl.sp.cfg.getType(screenplay.ACTION).width
                if len(tmp) < width:
                    tmp = ' ' * ((width - len(tmp)) // 2) + tmp
            line.text = tmp

        if removeMarkdown:
            line.text = unmarkdown(line.text)
            if line.lt == screenplay.CHARACTER and line.text.endswith('^'):
                line.text = line.text[:-1]

        lns.append(line)

    ret = []

    # second pass helper functions.
    def isLastLBForced():
        return ret and ret[-1].lb == screenplay.LB_FORCED

    def makeLastLBLast():
        if ret:
            ret[-1].lb = screenplay.LB_LAST

    def isRetPrevType(t):
        return ret and ret[-1].lt == t

    # second pass - remove unneeded empty lines, and fix the linebreaks.
    for ln in lns:
        if ln.text == '':
            if isLastLBForced():
                makeLastLBLast()
            else:
                ret.append(ln)

        elif not isRetPrevType(ln.lt):
            makeLastLBLast()
            ret.append(ln)

        else:
            ret.append(ln)

    makeLastLBLast()
    return ret
Example #29
0
def importFDX(fileName, frame):
    elemMap = {
        "Action" : screenplay.ACTION,
        "Character" : screenplay.CHARACTER,
        "Dialogue" : screenplay.DIALOGUE,
        "Parenthetical" : screenplay.PAREN,
        "Scene Heading" : screenplay.SCENE,
        "Shot" : screenplay.SHOT,
        "Transition" : screenplay.TRANSITION,
    }

    # the 5 MB limit is arbitrary, we just want to avoid getting a
    # MemoryError exception for /dev/zero etc.
    data = util.loadFile(fileName, frame, 5000000)

    if data == None:
        return None

    if len(data) == 0:
        wx.MessageBox("File is empty.", "Error", wx.OK, frame)

        return None

    try:
        root = etree.XML(data)
        lines = []

        def addElem(eleType, eleText):
            lns = eleText.split("\n")

            # if elem ends in a newline, last line is empty and useless;
            # get rid of it
            if not lns[-1] and (len(lns) > 1):
                lns = lns[:-1]

            for s in lns[:-1]:
                lines.append(screenplay.Line(
                        screenplay.LB_FORCED, eleType, util.cleanInput(s)))

            lines.append(screenplay.Line(
                    screenplay.LB_LAST, eleType, util.cleanInput(lns[-1])))

        for para in root.xpath("Content//Paragraph"):
            addedNote = False
            et = para.get("Type")

            # Check for script notes
            s = u""
            for notes in para.xpath("ScriptNote/Paragraph/Text"):
                if notes.text:
                    s += notes.text

                # FD has AdornmentStyle set to "0" on notes with newline.
                if notes.get("AdornmentStyle") == "0":
                    s += "\n"

            if s:
                addElem(screenplay.NOTE, s)
                addedNote = True

            # "General" has embedded Dual Dialogue paragraphs inside it;
            # nothing to do for the General element itself.
            #
            # If no type is defined (like inside scriptnote), skip.
            if (et == "General") or (et is None):
                continue

            s = u""
            for text in para.xpath("Text"):
                # text.text is None for paragraphs with no text, and +=
                # blows up trying to add a string object and None, so
                # guard against that
                if text.text:
                    s += text.text

            # don't remove paragraphs with no text, unless that paragraph
            # contained a scriptnote
            if s or not addedNote:
                lt = elemMap.get(et, screenplay.ACTION)
                addElem(lt, s)

        if len(lines) == 0:
            wx.MessageBox("The file contains no importable lines", "Error", wx.OK, frame)
            return None

        return lines

    except etree.XMLSyntaxError, e:
        wx.MessageBox("Error parsing file: %s" %e, "Error", wx.OK, frame)
        return None
Example #30
0
#!/usr/bin/env python
# add words to ../dict_en.dat in the correct place

import sys

if len(sys.argv) < 2:
    raise "add_word.py word1 word2..."

sys.path.insert(0, "..")

import util
util.init(False)

s = util.loadFile("../dict_en.dat", None)
if s == None:
    raise "error"

words = {}
lines = s.splitlines()

for it in lines:
    words[util.lower(it)] = None

for arg in sys.argv[1:]:
    words[util.lower(arg)] = None

words = words.keys()
words.sort()

f = open("../dict_en.dat", "wb")
for w in words:
Example #31
0
# add words to ../dict_en.dat in the correct place

import sys

if len(sys.argv) < 2:
    raise Exception("add_word.py word1 word2...")

sys.path.insert(0, "../src")

import misc
import util

util.init(False)
misc.init(False)

s = util.loadFile("../dict_en.dat", None)
if s == None:
    raise Exception("error")

words = {}
lines = s.splitlines()

for it in lines:
    words[util.lower(it)] = None

for arg in sys.argv[1:]:
    words[util.lower(arg)] = None

words = list(words.keys())
words.sort()