# Read configuration
inDir = config.getProperty("dir.parser.in")
minWordCount = int(config.getProperty("parser.minWordCount"))

print("Script Parser v.", parser_version)
print("================================")

print("Input directory:", inDir)
print("Minimum words per character: ", minWordCount)
if minWordCount < 150:
	print("[WARN] Minimum word count is important for clearing out the noise, caused by" +
		" wrongly-recognized character names etc. - Avoid adjusting this parameter to a" +
		" value below 150!")
print()

mongoClient, mongoDb = mongohelper.getMongoClient()

if len(sys.argv)==1 or (len(sys.argv)==2 and sys.argv[1]=='-forceUpdate'):
	scriptList = [f for f in listdir(inDir) if isfile(join(inDir, f))]
else:
	scriptList = [f for f in listdir(inDir) if isfile(join(inDir, f)) and parseMovieId(f) in sys.argv[1:]]

print('[  0%] Processing', len(scriptList), 'scripts')

countTotal = 0
countError = 0
countSkip  = 0

# Iterate over the input files
for i in range(len(scriptList)):
	countTotal += 1
Beispiel #2
0
# -*- coding: UTF-8 -*-
import sys

# Include custom libs
sys.path.append( '../../include/python' )

import serverutils.config as config
import serverutils.mongohelper as mongohelper

import pymongo

props = {'targetDbName': 'characterinsights_copy'}
for prop in sys.argv[1:]:
	k,v = prop.split("=",1)
	props[k]=v

# Connect DBs
mongoClientSource, mongoDbSource = mongohelper.getMongoClient()
mongoClientTarget, mongoDbTarget = mongohelper.getMongoClient(orMongoMode='local',orHost='localhost',orDbName=props['targetDbName'])

collections = ['inouttest','movies','personalitydimensionnormalizations','rawMovies','rawQuotes','results']

for collection in collections:
	print(collection)
	for doc in mongoDbSource[collection].find():
		mongoDbTarget[collection].insert_one(doc)
		print('.',end='',flush=True)
	print("")

print("Done.")
Beispiel #3
0
#!/usr/bin/python3
dump_version = "0.10"

# Include custom libs
import sys
sys.path.append('../../include/python')

import serverutils.config as config
import serverutils.mongohelper as mongohelper

import pymongo

mongoClient, mongoDb = mongohelper.getMongoClient(silent=True)

if len(sys.argv) < 2:
    print("Usage: dumpMovie.py <movieId>")
    print("")
    print("Listing movies:")
    for movie in mongoDb.rawMovies.find().sort("_id", pymongo.ASCENDING):
        print(movie['_id'])

else:
    movieId = sys.argv[1]
    movieCursor = mongoDb.rawMovies.find({'_id': movieId})
    if movieCursor.count() == 0:
        print("The movie with ID \"" + movieId +
              "\" could not be found. Calling this tool" +
              " without a movie ID will list all available movies.")
    else:
        movieData = movieCursor.next()
        print(movieData['normalizedMovieId'])
# Remove any trailing slashes
while newHostname[-1:] == "/" and len(newHostname) >= 1:
    newHostname = newHostname[:-1]

if len(newHostname) > 0 and (newHostname[0:4] == 'http'
                             and newHostname[8:].find("/") > 0):
    if not quiet:
        print("Invalid new hostname:", newHostname)
        print("The hostname must not contain any slashes")
    exit()

if not quiet:
    print("New host is", newHostname)

# Connect DBs
mongoClient, mongoDb = mongohelper.getMongoClient(silent=quiet)

for movie in mongoDb['movies'].find():
    movieName = movie['names']['resolved'] if len(
        movie['names']['resolved']) > 0 else movie['names']['scriptUnified']
    if not quiet:
        print("Processing", movieName)

    dataChanged = False

    oldMoviePosterUrl = movie['picture']['path']
    newMoviePosterUrl = replaceUrl(oldMoviePosterUrl, newHostname)

    if oldMoviePosterUrl != newMoviePosterUrl:
        movie['picture']['path'] = newMoviePosterUrl
        dataChanged = True