Esempi in Python per ReadFromFile, esempi in Python per ReadFromFile

Esempio n. 1

0

Mostra file

def get_data_list_preparative(sheet_data):
    list_title = {}
    list_subproject, list_year_month, dict_statistics_data = ReadFromFile.read_statistics_data(sheet_data)
    dict_item_cost = {}
    table_used = ReadFromFile.read_preparative_data('预统计数据清单（正式账户）.csv', '数据清单（正式账户）.csv')
    table_used.sort(key=itemgetter('子产品名称'))
    for name, items_groupby_name in groupby(table_used, key=itemgetter('子产品名称')):
        #print(' ', name)#子产品名称
        item_cost = {} 
        list_subproject.append(name) 
        list_groupby_date = list(items_groupby_name)
        list_groupby_date.sort(key=itemgetter('结束使用时间'))
        for year_month, items_groupby_date in groupby(list_groupby_date, key=itemgetter('结束使用时间')):
            #print(year_month)#年月
            list_year_month.append(year_month)
            item_cost[year_month] = 0
            for item in items_groupby_date:
                item_cost[year_month] = item_cost[year_month] + float(item['现金账户支出(元)'])
        dict_item_cost[name] = item_cost
            #print(item_cost)#子产品每月花费
        #print(dict_item_cost)

    list_year_month = list(set(list_year_month))
    list_subproject = list(set(list_subproject))
    list_year_month.sort()
    list_subproject.sort()
    #print(dict_item_cost)
    #print(list_year_month)
    #print(list_name)
    #ReadFromFile.WriteToFile.write_statistics_data(sheet_data, list_subproject, list_year_month, dict_item_cost)
    return list_subproject, list_year_month, dict_item_cost

Esempio n. 2

0

Mostra file

File: findCorrelations.py Progetto: dobule/ProfessorRatings

def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    vectorFileName = fn.create_prof_vect_name(sys.argv)
    corrFileName = fn.create_correlations_name(sys.argv)

    if not os.path.exists(corrFileName):
        tokenVects = read.word_vects(vectorFileName)
        if tokenVects is None:
            print("Specified vector file not found.")
            print("To create vectors use 'createProfVectors.py'")
            exit()
        ratingVect = read.overall_rating_vect(vectorFileName)
        vocabVect = read.vocab_from_vect_file(vectorFileName)
        corrTups = stat.find_correlations(tokenVects, ratingVect, vocabVect)
        write.token_correlations(corrTups, corrFileName)
    else:
        corrTups = read.token_correlations(corrFileName)

    corrPlotFileName = None
    if '-save' in sys.argv:
        corrPlotFileName = fn.create_correlations_plot_name(sys.argv)

    # Plot correlations
    plot.tuple_pair_score_correlation(
        corrTups,
        title=plot.create_token_pair_score_correlation_name(sys.argv),
        saveFile=corrPlotFileName)

Esempio n. 3

0

Mostra file

def create_prof_vectors(tokenSchema, argv, profDicts=None, profTokenDict=None):
    """ Create token count vectors for the aggrigate reviews of each 
       professor.
   """

    if profDicts is None:
        profDicts = read.prof_dicts()

    if profTokenDict is None:
        ptdName = fn.create_prof_token_dict_name(argv)
        profTokenDict = read.prof_token_dicts(ptdName)

    schemaDict = value_idx_dict(tokenSchema)

    profVects = []
    pidsNotIncluded = []
    for prof in profDicts:
        newVect = create_prof_vector(
            prof, count.combine_rev_counters(profTokenDict[prof['pid']]),
            schemaDict)
        if newVect['token_vect'] is None:
            pidsNotIncluded.append(newVect['pid'])
        else:
            profVects.append(newVect)

    pidsNotIncluded.sort()

    return profVects, pidsNotIncluded

Esempio n. 4

0

Mostra file

File: FfnnAlgorithm.py Progetto: dobule/ProfessorRatings

def non_single_small_idxs(pidVect):
    singlePids = set(read.pids_file(fn.PidsSingleRevFile))
    smallPids = set(read.pids_file(fn.PidsSmallRevLenFile))
    nonSingleSmallIdxs = [
        idx for idx, pid in enumerate(pidVect)
        if pid not in singlePids and pid not in smallPids
    ]
    return np.array(nonSingleSmallIdxs)

Esempio n. 5

0

Mostra file

def create_rev_vectors(tokenSchema, argv, profDicts=None, profTokenDict=None):

    if profDicts is None:
        profDicts = read.prof_dicts()

    if profTokenDict is None:
        ptdName = fn.create_prof_token_dict_name(argv)
        profTokenDict = read.prof_token_dicts(ptdName)

    schemaDict = value_idx_dict(tokenSchema)

    revVects = []
    for prof in profDicts:
        for rev in prof['reviews']:
            revVects.append(create_rev_vector(rev, schemaDict))

Esempio n. 6

0

Mostra file

def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    # Open profTokenDicts with raw word count
    profDicts = read.prof_dicts()

    singlePids = []
    smallLenPids = []

    # Iterate through prof token dicts
    for prof in profDicts:
        if len(prof['reviews']) == 1:
            singlePids.append(prof['pid'])
        total = 0
        for rev in prof['reviews']:
            total += len(rev['text'])
        if total <= MaxWordCount:
            smallLenPids.append(prof['pid'])

    print("Num singlePids:", len(singlePids))
    print("Num small pids:", len(smallLenPids))

    singlePids.sort()
    smallLenPids.sort()

    write.pids_file(singlePids, fn.PidsSingleRevFile)
    write.pids_file(smallLenPids, fn.PidsSmallRevLenFile)

Esempio n. 7

0

Mostra file

File: PVT.py Progetto: kmcken/PetroThermo

def speed_of_sound(temp, press, substance=None, formula=None):
    """
    Calculates the speed of sound for a pure substance

    :param substance: The substance
    :type substance: str
    :param formula: The substance formula
    :type formula: str
    :param temp: The substance temperature (K)
    :type temp: float
    :param press: The substance pressure (Pa)
    :type press: float
    :return: Speed of Sound, vs (m/s)
    :rtype: float
    """

    Cp = cp(temp, substance=substance, formula=formula)
    Cv = cv(temp, press, substance=substance, formula=formula)
    try:
        MW, Tc, Pc, Ttrip, Ptrip, Acentric = read.get_phase_change_data(
            name=substance, formula=formula)
    except:
        t = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())
        logging.error(
            '{0} Volumetric Heat Capacity: Error loading phase change data.'.
            format(t))
        raise ValueError

    dPdV = pr.dPdV(temp, press, Tc, Pc, Acentric)
    vol = pr.volume(temp, press, Tc, Pc, Acentric)
    return np.sqrt(-1 / (MW * 0.001) * Cp / Cv * dPdV) * vol

Esempio n. 8

0

Mostra file

File: countTokens.py Progetto: dobule/ProfessorRatings

def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    stmr = None
    stopwords = None
    if '-ss' in sys.argv:
        stmr = LancasterStemmer()
        stopwords = read.stopwords(stmr)

    countNames = fn.create_token_count_names(sys.argv)
    rawTokenCountName = countNames[0]
    revTokenCountName = countNames[1]
    profTokenCountName = countNames[2]

    rawTokens = read.token_count(rawTokenCountName, True)
    revTokens = read.token_count(revTokenCountName, True)
    profTokens = read.token_count(profTokenCountName, True)

    if rawTokens == None or revTokens == None or profTokens == None:
        profTokenDict = grab_prof_token_dict(stopwords, stmr)

        if rawTokens == None:
            rawTokens = grab_token_count(profTokenDict, count.num_tokens,
                                         rawTokenCountName)
        if revTokens == None:
            revTokens = grab_token_count(profTokenDict,
                                         count.num_reviews_with_token,
                                         revTokenCountName)

        if profTokens == None:
            profTokens = grab_token_count(profTokenDict,
                                          count.num_profs_with_token,
                                          profTokenCountName)

    plotName = create_plot_name()
    plotFileName = None
    if '-save' in sys.argv:
        plotFileName = fn.create_count_plot_name(sys.argv)

    plot.token_counts(rawTokens, revTokens, profTokens, plotFileName, plotName)

Esempio n. 9

0

Mostra file

def process_token_vectors(vects, argv):
    if '-tf' in argv:
        vects = np.apply_along_axis(to_tf_vect, 1, vects)
    elif '-tfidf' in argv:
        vocab = read.vocab_from_vect_file(fn.create_prof_vect_name(argv))
        idfVect = create_idf_vect(vocab, vects.shape[0], argv)
        print(idfVect.shape, vects.shape)
        vects = np.apply_along_axis(lambda x: to_tf_idf_vect(x, idfVect), 1,
                                    vects)
    return vects

Esempio n. 10

0

Mostra file

File: SCNComposition.py Progetto: kmcken/PetroThermo

def specific_gravity_c7plus(n, fractions):
    M, gamma = list(), list()
    for i in range(0, len(n)):
        M.append(read.get_phase_change_data(scn=n[i])[0])
        gamma.append(read.get_phase_change_data(scn=n[i])[4])

    sumM = 0
    for i in range(0, len(n)):
        sumM += fractions[i] * M[i]

    fraction7 = 0
    for i in range(6, len(n)):
        fraction7 += fractions[i] * M[i]

    density = 0
    for i in range(6, len(n)):
        density += fractions[i] * M[i] / (fraction7 * gamma[i])

    return 1 / density

Esempio n. 11

0

Mostra file

File: SCNComposition.py Progetto: kmcken/PetroThermo

def MW_c7plus(n, fractions):
    z_c7p = z_c7plus(n, fractions)
    index7 = 0
    while n[index7] < 8:
        index7 += 1
    index7 -= 1

    MW_c7p = 0
    for i in range(index7, len(n)):
        MW_c7p += read.get_phase_change_data(
            scn=n[i])[0] * fractions[i] / z_c7p
    return MW_c7p

Esempio n. 12

0

Mostra file

def token_schema_from_count(argv):
    countsFileName = fn.create_token_count_names(argv)
    countsFileName = countsFileName[1]  # Num revs token appears in
    tokenCounts = read.token_count(countsFileName)
    if tokenCounts is None:
        print("Token count file not found.")
        print("Create token count file using 'countTokens.py'")
        exit()

    minCount = int(argv[argv.index('-minCount') + 1])
    tokenSchema = [t for t, c in tokenCounts.items() if c >= minCount]
    tokenSchema.sort()
    return tokenSchema

Esempio n. 13

0

Mostra file

def create_idf_vect(vocab, numProfs, argv):
    """ vocab is expected to be a python list """

    countFileName = fn.create_token_count_names(argv)
    countFileName = countFileName[2]

    tokCounts = read.token_count(countFileName)

    countVect = np.zeros(len(vocab), dtype=float)

    for idx, word in enumerate(vocab):
        countVect[idx] = tokCounts[word]

    return np.log(numProfs / countVect)

Esempio n. 14

0

Mostra file

    def __init__(self, path):
        self.path = path

        # Utility that allows reading of the file contents
        self.util = rff.ReadFile()

        # Run the utility on the path given
        self.util.Run(self.path)

        # Separates each attribute type and it's examples into it's own list
        self.ParseData()
        self.ExampleToDataType()

        self.transformed_examples = copy.deepcopy(self.examples.copy())
        self.transformed_attributes = self.attributes.copy()

        # Takes the attributes of each example and transforms it to a number
        self.TransformAttributes()
        self.TransformExamples(self.transformed_examples)

Esempio n. 15

0

Mostra file

File: PVT.py Progetto: kmcken/PetroThermo

def cv(temp, press, substance=None, formula=None):
    """
    Calculates the volumetric heat capacity of a substance.
    Cv = Cp + T * dVdP * dPdT**2

    :param substance: The substance
    :type substance: str
    :param formula: The substance formula
    :type formula: str
    :param temp: The substance temperature (K)
    :type temp: float
    :param press: The substance pressure (Pa)
    :type press: float
    :return: cv (J/mol-K)
    :rtype: float
    """
    if substance is None and formula is None:
        t = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())
        logging.error(
            '{0} Volumetric Heat Capacity: No name or formula input.'.format(
                t))
        raise ValueError

    try:
        MW, Tc, Pc, Ttrip, Ptrip, Acentric = read.get_phase_change_data(
            name=substance, formula=formula)
    except:
        t = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())
        logging.error(
            '{0} Volumetric Heat Capacity: Error loading phase change data.'.
            format(t))
        raise ValueError

    Cp = cp(temp, substance=substance, formula=formula)
    dPdT = pr.dPdT(temp, press, Tc, Pc, Acentric)[0]
    dVdP = 1 / pr.dPdV(temp, press, Tc, Pc, Acentric)[0]

    return Cp + temp * dVdP * dPdT**2

Esempio n. 16

0

Mostra file

File: PVT.py Progetto: kmcken/PetroThermo

def cp(temp, substance=None, formula=None):
    """
    Calculates the constant pressure heat capacity of a substance.
    Cp = A + B/1e2 T + C/1e5 T^2 + D/1e9 T^3

    :param substance: The substance
    :type substance: str
    :param formula: The substance formula
    :type formula: str
    :param temp: The substance temperature (K)
    :type temp: float
    :return: cp (J/mol-K)
    :rtype: float
    """
    if substance is None and formula is None:
        t = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())
        logging.error('{0} Heat Capacity: No name or formula input.'.format(t))
        raise ValueError

    const = read.get_heat_capacity_constants(name=substance, formula=formula)
    R = 8.314459848  # Gas Constant: m^3 Pa mol^-1 K^-1 3.64*R -1.101e-3*R*temp +2.466e-6*R*temp**2 -0.942e-9*R*temp**3
    return const[0] + const[1] * 1e-2 * temp + const[
        2] * 1e-5 * temp**2 + const[3] * 1e-9 * temp**3

Esempio n. 17

0

Mostra file

def token_schema_from_correlations(argv):
    corrFileName = fn.create_correlations_name(argv)
    corrTups = read.token_correlations(corrFileName)
    if corrTups is None:
        print("Correlations file not found")
        print("Create correlations file with 'findCorrelations.py'")
        exit()

    corIdx = argv.index('-corr')
    minCount = int(argv[corIdx + 1])
    minScore = float(argv[corIdx + 2])

    reducedTups = [(cor[0], cor[1]) for cor in corrTups
                   if cor[2] >= minCount and cor[3] >= abs(minScore)]

    tokenSet = set()
    for tok1, tok2 in reducedTups:
        tokenSet.add(tok1)
        tokenSet.add(tok2)

    tokenSchema = list(tokenSet)
    tokenSchema.sort()
    return tokenSchema

Esempio n. 18

0

Mostra file

File: countTokens.py Progetto: dobule/ProfessorRatings

def grab_prof_token_dict(stopwords, stmr):

    filename = fn.create_prof_token_dict_name(sys.argv)

    if os.path.exists(filename):
        with open(filename, 'rb') as f:
            profTokenDict = pickle.load(f)
        return profTokenDict

    token_f = lambda t: count.create_single_tokens(t, stopwords, stmr)
    if '-tup' in sys.argv:
        token_f = lambda t: count.create_tuple_tokens(t, stopwords, stmr)
    elif '-stup' in sys.argv:
        token_f = (
            lambda t: count.create_single_tuple_tokens(t, stopwords, stmr))

    profs = read.prof_dicts()

    profTokenDict = count.create_prof_token_dict(profs, token_f)

    with open(filename, 'wb') as f:
        pickle.dump(profTokenDict, f)

    return profTokenDict

Esempio n. 19

0

Mostra file

File: main.py Progetto: shirangi/DeclineCurveAnalysis

    logger = logging.getLogger(name)
    logger.setLevel(level)
    logger.addHandler(handler)
    return logger


# Setup Log Files
root_path = os.path.dirname(os.path.realpath(__file__))
runlog = setup_logger('runlog',
                      root_path + '/Logs/run.log',
                      level=logging.DEBUG)
alglog = setup_logger('alglog', root_path + '/Logs/alg.log')

runlog.info('START Decline Curve Analysis.')

df = read.production_monthyear(root_path + '/Data/spindletop.csv')
prod = read.production_by_month(df)

prod1 = np.extract(
    np.extract(prod[0] < 1926, prod) > 1902.6,
    np.extract(prod[0] < 1926, prod[1]))
time1 = [
    i - 1902.6 + 1e-9 for i in np.extract(
        np.extract(prod[0] < 1926, prod) > 1902.6,
        np.extract(prod[0] < 1926, prod[0]))
]
time1 = np.extract(~np.isnan(prod1), time1)
prod1 = np.extract(~np.isnan(prod1), prod1)
prod2 = np.extract(prod[0] > 1926.5, prod[1])
time2 = np.extract(prod[0] > 1926.5, prod[0]) - 1926.5
time2[0] = 1e-8

Esempio n. 20

0

Mostra file

File: runFFNN.py Progetto: dobule/ProfessorRatings

def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    vectorFileName = fn.create_prof_vect_name(sys.argv, True)
    tokenVects = read.word_vects(vectorFileName)
    if tokenVects is None:
        print("Could not find token vects")
        print("Use 'createProfVectors.py' to create vectors")
        exit()

    tokenVects = vp.process_token_vectors(tokenVects, sys.argv)

    if '-d' in sys.argv:
        ratings = read.difficulty_rating_vect(vectorFileName)
    else:
        ratings = read.overall_rating_vect(vectorFileName)

    # Create Training and validation sets
    pidVect = read.pid_vect(vectorFileName)

    nonSingleSmallIdxs = ffnn.non_single_small_idxs(pidVect)
    singleIdxs = vp.pids_to_idxs(pidVect, read.pids_file(fn.PidsSingleRevFile))
    smallIdxs = vp.pids_to_idxs(pidVect,
                                read.pids_file(fn.PidsSmallRevLenFile))
    singleSmallIdxs = list(set(singleIdxs).union(set(smallIdxs)))
    singleSmallIdxs.sort()
    singleSmallIdxs = np.array(singleSmallIdxs)

    trainingVects = tokenVects[nonSingleSmallIdxs, :]
    trainingRatings = ratings[nonSingleSmallIdxs]

    validVects = tokenVects[singleSmallIdxs, :]
    validRatings = ratings[singleSmallIdxs]

    print(trainingVects.shape, trainingRatings.shape, validVects.shape,
          validRatings.shape)
    """
  
   xTrain, xValid, yTrain, yValid = train_test_split(tokenVects, ratings,
                                                      test_size=0.3)
   """
    # Select and train model
    if '-deep' in sys.argv:
        model = ffnn.deep_model(tokenVects.shape[1])
    else:
        model = ffnn.shallow_model(tokenVects.shape[1])

    history = model.fit(trainingVects,
                        trainingRatings,
                        epochs=10,
                        batch_size=5,
                        validation_data=(validVects, validRatings))

    plotTitle = plot.ffnn_error_title(sys.argv)
    outfile = None
    if '-save' in sys.argv:
        outfile = fn.create_ffnn_plot_name(sys.argv)

    plot.ffnn_error(history, title=plotTitle, filename=outfile)

Esempio n. 21

0

Mostra file

#!/usr/bin/python

import ReadFromFile
import updateAuthorIdinPaperAuthor
import InsertIntoAuthorAuthor

ReadFromFile.do()
updateAuthorIdinPaperAuthor.do()
InsertIntoAuthorAuthor.do()

Esempio n. 22

0

Mostra file

            #print(item_cost)#子产品每月花费
        #print(dict_item_cost)

    list_year_month = list(set(list_year_month))
    list_subproject = list(set(list_subproject))
    list_year_month.sort()
    list_subproject.sort()
    #print(dict_item_cost)
    #print(list_year_month)
    #print(list_name)
    #ReadFromFile.WriteToFile.write_statistics_data(sheet_data, list_subproject, list_year_month, dict_item_cost)
    return list_subproject, list_year_month, dict_item_cost


sheet_data = '正式账户'
list_subproject_statistics, list_year_month_statistics, dict_statistics_data = ReadFromFile.read_statistics_data(sheet_data)
list_subproject_preparative, list_year_month_preparative, dict_preparative_data = get_data_list_preparative(sheet_data)
list_subproject_preparative.extend(list_subproject_statistics)
list_subproject = list(set(list_subproject_preparative))
list_subproject.sort()
list_year_month_preparative.extend(list_year_month_statistics)
list_year_month = list(set(list_year_month_preparative))
list_year_month.sort()
for preparative_key,preparative_value in dict_preparative_data.items():
    print(preparative_key in dict_statistics_data.keys())
    if dict_statistics_data[preparative_key] is not None:
        print(preparative_value)
        for preparative_value_key,preparative_value_value in preparative_value.items():
            print(preparative_value_key in dict_statistics_data[preparative_key].keys())
##            if (preparative_value_key in dict_statistics_data[preparative_key].keys()):
##                if dict_statistics_data[preparative_key][preparative_value_key] is not None:

Esempio n. 23

0

Mostra file

def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    profDicts = read.prof_dicts()

    revLens = st.rev_len_arr(profDicts)
    print("Number of Reviews:", revLens.shape[0])
    print("Mean review length:", revLens.mean())
    print("Std Dev review length:", revLens.std())
    print()

    numRevsProf = st.num_revs_profs(profDicts)
    print("Number of professors:", numRevsProf.shape[0])
    print("Mean num reviews per prof:", numRevsProf.mean())
    print("Std Dev num revews per prof:", numRevsProf.std())
    print()

    profRevLen = st.profs_revs_len(profDicts)
    print("Mean tokens per prof:", profRevLen.mean())
    print("Std Dev tokens per prof:", profRevLen.std())
    print()

    overRats = np.array([prof['rating_overall'] for prof in profDicts],
                        dtype=float)
    diffRats = np.array([prof['rating_difficulty'] for prof in profDicts],
                        dtype=float)

    overRatMean = overRats.mean()
    diffRatMean = diffRats.mean()

    print("Overall ratings mean:", overRatMean)
    print("Overall ratings std dev:", overRats.std())
    print("Difficulty ratings mean:", diffRatMean)
    print("Difficulty ratings std dev:", diffRats.std())
    print()

    overMeanDiff = overRats - overRatMean
    overMeanDiff = np.abs(overMeanDiff)
    diffMeanDiff = diffRats - diffRatMean
    diffMeanDiff = np.abs(diffMeanDiff)

    print("Nieve approach to prediction: Guessing the Mean")
    print("All profs")
    print("Overall absolute error mean:", overMeanDiff.mean())
    print("Overall absolute error std div:", overMeanDiff.std())
    print("Difficulty absolute error mean:", diffMeanDiff.mean())
    print("Difficulty absolute error std div:", diffMeanDiff.std())
    print()

    oneRevPids = set(read.pids_file(fn.PidsSingleRevFile))
    oneOverRats = np.array([
        prof['rating_overall']
        for prof in profDicts if prof['pid'] in oneRevPids
    ],
                           dtype=float)
    oneOverDiff = np.abs(oneOverRats - oneOverRats.mean())

    print("Profs with one review")
    print("One review absolute error mean:", oneOverDiff.mean())
    print("One review absolute error std div:", oneOverDiff.std())
    print()

    smallRevPids = set(read.pids_file(fn.PidsSmallRevLenFile))
    smallOverRats = np.array([
        prof['rating_overall']
        for prof in profDicts if prof['pid'] in smallRevPids
    ],
                             dtype=float)
    smallOverDiff = np.abs(smallOverRats - smallOverRats.mean())

    print("Profs with short reviews")
    print("Small review absolute error mean:", smallOverDiff.mean())
    print("small review absolute error std div:", smallOverDiff.std())
    print()

    save = False
    if '-save' in sys.argv:
        save = True

    plot.plot_word_review_count(revLens, profRevLen, numRevsProf, save=save)

Esempio n. 24

0

Mostra file

def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    vectFileName = fn.create_prof_vect_name(sys.argv, True)
    simMatFileName = fn.create_sim_mat_name(sys.argv)
    predsFileName = fn.create_preds_name(sys.argv)

    print(vectFileName)
    print(simMatFileName)
    print(predsFileName)

    # Grab the ratings vector
    if '-d' in sys.argv:
        ratings = read.difficulty_rating_vect(vectFileName)
    else:
        ratings = read.overall_rating_vect(vectFileName)

    # Assign similarity metric
    sim_f = vp.inverse_euclidean_distance
    if '-cos' in sys.argv:
        sim_f = vp.cosine_similarity
    elif '-pear' in sys.argv:
        sim_f = vp.abs_pearson_correlation

    # Set if weighted or not
    weighted = True
    if '-unweighted' in sys.argv:
        weighted = False

    # Grab predictions or create them if not available
    predictions = read.knn_predictions(predsFileName)
    if predictions is None:

        simMat = read.similarity_matrix(simMatFileName)
        if simMat is None:
            wordVects = read.word_vects(vectFileName)
            if wordVects is None:
                print("Vector file " + vectFileName + " does not exist")
                exit()
            wordVects = vp.process_token_vectors(wordVects, sys.argv)
            simMat = knn.get_similarity_matrix(wordVects, sim_f)
            write.similarity_matrix(simMat, simMatFileName)

        predictions = knn.knn_dataset(ratings, MaxK, simMat, weighted)
        write.knn_predictions(predictions, predsFileName)

    idxToPlot = None

    if '-maxK' in sys.argv:
        maxK = int(sys.argv[sys.argv.index('-maxK') + 1])
        predictions = predictions[:, :maxK]

    pidVect = read.pid_vect(vectFileName)
    singleRevIdxs = vp.pids_to_idxs(pidVect,
                                    read.pids_file(fn.PidsSingleRevFile))
    smallLenIdxs = vp.pids_to_idxs(pidVect,
                                   read.pids_file(fn.PidsSmallRevLenFile))

    plotFileName = None
    if '-save' in sys.argv:
        plotFileName = fn.create_knn_accuracy_plot_name(sys.argv)

    # Output results of the run
    plot.knn_error(
        predictions,
        ratings,
        title=plot.create_knn_error_title(sys.argv),
        idxToPlot=[singleRevIdxs, smallLenIdxs],
        subTitles=[
            "Error with profs with one review",
            "Error with profs with aggrigate review " +
            "lengths one std div above the mean " + "review length or less"
        ],
        saveFile=plotFileName)

Esempio n. 25

0

Mostra file

File: main.py Progetto: kmcken/PetroThermo

    handler = logging.FileHandler(log_file)
    handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
    logger = logging.getLogger(name)
    logger.setLevel(level)
    logger.addHandler(handler)
    return logger


# Setup Log Files
root_path = os.path.dirname(os.path.realpath(__file__))
runlog = setup_logger('runlog', root_path + '/Logs/run.log', level=logging.DEBUG)
alglog = setup_logger('alglog', root_path + '/Logs/alg.log')

runlog.info('START Thermodynamic Analysis of Multi-Phase Petroleum Fluids.')

names, number, fractions = read.scn_composition(root_path + '/Data/composition.txt')
# delta = read.get_binary_interations(scn_list=number)

mw_raw = list()
for n in range(0, len(number)):
    mw_raw.append(read.get_phase_change_data(scn=number[n])[0])

mw30 = list()
for n in range(0, 30):
    mw30.append(read.get_phase_change_data(scn=n + 1)[0])

zc7p = scn.z_c7plus(number, fractions)
Mc7p = scn.MW_c7plus(number, fractions)
gc7p = scn.specific_gravity_c7plus(number, fractions)

fraction7 = list()