Python xrange 예제들, cffi.backend_ctypes.xrange Python 예제들

예제 #1

0

파일 보기

 def minimumTotal3(self, triangle):
     if not triangle:
         return
     for i in xrange(len(triangle) - 2, -1, -1):
         for j in xrange(len(triangle[i])):
             triangle[i][j] += min(triangle[i + 1][j],
                                   triangle[i + 1][j + 1])
     return triangle[0][0]

예제 #2

0

파일 보기

 def minimumTotal(self, triangle):
     if not triangle:
         return
     res = triangle[-1]
     for i in xrange(len(triangle) - 2, -1, -1):
         for j in xrange(len(triangle[i])):
             res[j] = min(res[j], res[j + 1]) + triangle[i][j]
     return res[0]

예제 #3

0

파일 보기

파일: Confuse.py 프로젝트: manan1997/Predict_Air_Grade

def main(true, pred):
    sum = 0.0
    a = confuse(true, pred)
    print(a)
    # plot_confusion_matrix(a)
    for i in xrange(len(a)):
        for j in xrange(len(a)):
            if i == j:
                sum += a[i][j]

    print("Accuracy : ", (sum / len(true)) * 100)

예제 #4

0

파일 보기

파일: correlation.py 프로젝트: manan1997/Predict_Air_Grade

def multiple_correlation():
    A = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['PM 2.5'])
    B = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['T'])
    C = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['TM'])
    D = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['Tm'])
    E = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['SLP'])
    F = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['H'])
    G = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['VV'])
    H = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['VM'])
    I = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['V'])

    coerr = []
    coerr.append(B)
    coerr.append(C)
    coerr.append(D)
    coerr.append(E)
    coerr.append(F)
    coerr.append(G)
    coerr.append(H)
    coerr.append(I)

    myfinalcorr = []
    k = 7



    for i in xrange(len(coerr)):
        mycorr = []
        for j in xrange(8):
            corr1 = pearsonr(A, coerr[i])
            corr2 = pearsonr(A, coerr[j])
            corr3 = pearsonr(coerr[i], coerr[j])
            corr = math.sqrt((math.pow(corr1[0], 2) + math.pow(corr2[0], 2) - (2 * corr1[0] * corr2[0] * corr3[0])) / (
                        1 - math.pow(corr3[0], 2)))
            mycorr.append(corr)
        k = k - 1
        print(mycorr)
        myfinalcorr.append(mycorr)

    # print max(myfinalcorr)

    a = 0.0
    for i in myfinalcorr:
        for j in i:
            if a < j:
                a = j

    print("Max Correlation and Position: ",a, index_2d(myfinalcorr, a))

    tick_marks = np.arange(8)
    h = sns.heatmap(myfinalcorr, square=True)
    plt.yticks(tick_marks, ['T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'VM', 'V'], rotation=45)
    plt.xticks(tick_marks, ['T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'VM', 'V'], ha='left', rotation=45)
    plt.show()

예제 #5

0

파일 보기

 def minimumTotal_2(self, triangle):
     if not triangle:
         return
     for i in xrange(1, len(triangle)):
         for j in xrange(len(triangle[i])):
             if j == 0:
                 triangle[i][j] += triangle[i - 1][j]
             elif j == len(triangle[i]) - 1:
                 triangle[i][j] += triangle[i - 1][j - 1]
             else:
                 triangle[i][j] += min(triangle[i - 1][j - 1],
                                       triangle[i - 1][j])
     return min(triangle[-1])

예제 #6

0

파일 보기

def predict_sequences_multiple(model, data, window_size, prediction_len):
    # Predict sequence of 50 steps before shifting prediction run forward by 50 steps
    prediction_seqs = []
    for i in xrange(len(data) // prediction_len):
        curr_frame = data[i * prediction_len]
        predicted = []
        for j in xrange(prediction_len):
            predicted.append(model.predict(curr_frame[newaxis, :, :])[0, 0])
            curr_frame = curr_frame[1:]
            curr_frame = np.insert(curr_frame, [window_size - 1],
                                   predicted[-1],
                                   axis=0)
        prediction_seqs.append(predicted)
    return prediction_seqs

예제 #7

0

파일 보기

파일: sort_file_by_filetype.py 프로젝트: kshitijwani/File_Manager

def sort_type1(request, file_path):
    global sorted
    if (request == "file size"):
        """ Return list of file paths in directory sorted by file size """
        root = tk.Tk()
        root.title('Sort by Size')
        # Get list of files
        filepaths = []
        for basename in os1.listdir(file_path):
            filename = os1.path.join(file_path, basename)
            if os1.path.isfile(filename):
                filepaths.append(filename)

        # Re-populate list with filename, size tuples
        for i in xrange(len(filepaths)):
            filepaths[i] = (filepaths[i], os1.path.getsize(filepaths[i]))

        filepaths.sort(key=lambda filename: filename[1], reverse=True)

        # Re-populate list with just filenames
        for i in xrange(len(filepaths)):
            filepaths[i] = filepaths[i][0]
        file = ""
        for i in filepaths:
            file = file + '\n' + i
        tk.Label(root, text=file).pack()
        tk.Button(root, text='Ok', command=root.destroy).pack()
        root.mainloop()

    elif (request == "last modified"):
        root = tk.Tk()
        root.title('Sort by Size')
        paths = sorted(Path(file_path).iterdir(), key=os.path.getmtime)
        file = ""
        for i in paths:
            file = file + '\n' + str(i)
        tk.Label(root, text=file).pack()
        tk.Button(root, text='Ok', command=root.destroy).pack()
        root.mainloop()

    elif (request == "Name"):
        root = tk.Tk()
        root.title('Sort by Size')
        sorted = sorted(os1.listdir(file_path))
        files = ""
        for file in sorted:
            files = files + "\n" + file
        tk.Label(root, text=files).pack()
        tk.Button(root, text='Ok', command=root.destroy).pack()
        root.mainloop()

예제 #8

0

파일 보기

 def minimumTotal_1(self, triangle):
     if not triangle:
         return
     res = [[0 for i in xrange(len(row))] for row in triangle]
     res[0][0] = triangle[0][0]
     for i in xrange(1, len(triangle)):
         for j in xrange(len(triangle[i])):
             if j == 0:
                 res[i][j] = res[i - 1][j] + triangle[i][j]
             elif j == len(triangle[i]) - 1:
                 res[i][j] = res[i - 1][j - 1] + triangle[i][j]
             else:
                 res[i][j] = min(res[i - 1][j - 1],
                                 res[i - 1][j]) + triangle[i][j]
     return min(res[-1])

예제 #9

0

파일 보기

파일: webapp.py 프로젝트: jackynguyen93/csv2db

def upload():
    uploadFile = request.files['data_file']
    res = "OK"
    if not uploadFile:
        res = "No file"
    colMap = parse_multi_form(request.form)['col']
    df = pd.read_csv(uploadFile, encoding='windows-1252')
    df.drop(df.tail(2).index, inplace=True)
    data = df.to_dict()
    Session = scoped_session(session_factory)
    session = Session()
    length = len(list(data.values())[0])
    for i in xrange(length):
        valueMapping = {}
        for col in colMap:
            valueMapping[col] = str(data[colMap[col]][i]) if data[
                colMap[col]][i] == data[colMap[col]][i] else None
            if col == 'custom_label' and data[colMap[col]][i] == data[
                    colMap[col]][i]:
                valueMapping['store_id'] = data[colMap[col]][i].split("-")[0]
        ins = insert(Store)
        ins = ins.values(valueMapping)
        session.execute(ins)
    session.commit()
    response = app.response_class(response=json.dumps(res),
                                  status=200,
                                  mimetype='application/json')
    return response

예제 #10

0

파일 보기

def count_for_cates(trainText):
    '''
        extract feature words
        compute the # of every word appeared in each class;
        the # of words in each class
    '''
    docCount = [0] * len(lables)
    wordCount = collections.defaultdict(doc_dict)
    # computation
    for line in trainText:
        lable, text = line.strip().split(' ', 1)
        index = lable2id(lable)
        words = text.split(' ')
        for word in words:
            wordCount[word][index] += 1
            docCount[index] += 1
    # print(docCount)
    # print(wordCount)

    # calculate mutual information
    miDict = collections.defaultdict(doc_dict)
    N = sum(docCount)
    for k, vs in wordCount.items():
        for i in xrange(len(vs)):
            N11 = vs[i]
            N10 = sum(vs) - N11
            N01 = docCount[i] - N11
            N00 = N - N11 - N10 - N01
            mi = mutual_info(N, N11, N10 + N11, N01 + N11) + mutual_info(
                N, N10, N10 + N11, N00 + N10) + mutual_info(
                    N, N01, N01 + N11, N01 + N00) + mutual_info(
                        N, N00, N00 + N10, N00 + N01)
            miDict[k][i] = mi
    # print(miDict)

    f2 = open('featureFile.csv', 'w')
    writer2 = csv.writer(f2)
    writer2.writerow(docCount)
    fWords = set()
    for i in xrange(len(docCount)):
        keyf = lambda x: x[1][i]
        sortedDict = sorted(miDict.items(), key=keyf, reverse=True)
        # 10 * num of classes: num of featureWord
        for j in xrange(5 * num):
            fWords.add(sortedDict[j][0])
    writer2.writerow(fWords)

예제 #11

0

파일 보기

def test_dictionary_access():
    """
    tint as key of dictionary
    :return:
    """

    reset_comparisons()
    assert len(Comparisons) == 0

    key1 = tint(10)
    key2 = tint(123)
    key3 = tint(109324)

    value1 = random.sample(xrange(100), 10)
    value2 = random.sample(xrange(100), 10)
    value3 = random.sample(xrange(100), 10)

    dictionary = {key1: value1, key2: value2, key3: value3}

    # check 'in' operator with list
    l = [1, 2, 3]
    assert tint(2) in l  # works

    # test access via index
    assert dictionary[key1]
    assert dictionary[key2]
    assert dictionary[10]
    assert dictionary[tint(10)]

    # dictionary.keys() return type 'dict_keys' (set of dictionary view)
    assert key1 in dictionary.keys()
    assert key2 in dictionary.keys()
    assert key3 in dictionary.keys()
    assert 10 in dictionary.keys()
    assert tint(10) in dictionary.keys()
    assert tint(123) in dictionary.keys()
    assert tint(109324) in dictionary.keys()

    # tint has to be hashable
    for key in dictionary.keys():
        assert isinstance(key, tint)
        assert key.has_taint()

    for key, value in dictionary.items():
        assert isinstance(key, tint)
        assert key.has_taint()

예제 #12

0

파일 보기

파일: guacamole.py 프로젝트: tschoonj/calipsoplus-backend

    def create_connection(self, params):
        self.logger.info('Attempting to run create_connection_guacamole')

        guacamole_username = params["guacamole_username"]
        guacamole_password = params["guacamole_password"]
        guacamole_connection_name = params["guacamole_connection_name"]
        guacamole_protocol = params["guacamole_protocol"]
        vnc_password = params["vnc_password"]
        container_ip = params["container_ip"]
        container_port = params["container_port"]

        try:
            salt = bytearray(random.getrandbits(8) for _ in xrange(32))
            salt_hex = ''.join('{:02X}'.format(x) for x in salt)
            password_hash = hashlib.sha256(
                (guacamole_password + salt_hex).encode('utf-8)')).digest()

            # delete guacamole user
            GuacamoleUser.objects.using('guacamole').filter(
                username=guacamole_username).delete()

            # create new connection
            connection = GuacamoleConnection.objects.using('guacamole').create(
                connection_name=guacamole_connection_name,
                protocol=guacamole_protocol,
                failover_only=0)

            user = GuacamoleUser.objects.using('guacamole').create(
                username=guacamole_username,
                password_salt=salt,
                password_hash=password_hash,
                password_date=timezone.now(),
                disabled=0,
                expired=0)

            GuacamoleConnectionPermission.objects.using('guacamole').create(
                user=user, connection=connection, permission='READ')

            GuacamoleConnectionParameter.objects.using('guacamole').create(
                connection=connection,
                parameter_name="hostname",
                parameter_value=container_ip)

            GuacamoleConnectionParameter.objects.using('guacamole').create(
                connection=connection,
                parameter_name="port",
                parameter_value=container_port)

            GuacamoleConnectionParameter.objects.using('guacamole').create(
                connection=connection,
                parameter_name="password",
                parameter_value=vnc_password)

            return "ok"

        except Exception as e:
            return e

예제 #13

0

파일 보기

def is_prime(num):
    if num == 2:
        return True
    if num < 2 or num % 2 == 0:
        return False
    for n in xrange(3, int(num**0.5) + 2, 2):
        if num % n == 0:
            return False
    return True

예제 #14

0

파일 보기

파일: Rec_predictModel.py 프로젝트: JillNiu/720FinalProject

def count_for_cates(trainText):

    pplCount = [0] * len(lables)
    cateCount = collections.defaultdict(doc_dict)
    # computation
    for line in trainText:
        lable = line[0]
        fileCategory = line[1]
        index = lable2id(lable)
        # the # of one fileCategory reviewed by each ppl
        cateCount[fileCategory][index] += 1
        # the # of fileCategory reviewed by each ppl
        pplCount[index] += 1
    # print(pplCount)
    # print(cateCount)

    # calculate mutual information
    # the relevant degree of fileCategory to ppl
    miDict = collections.defaultdict(doc_dict)
    N = sum(pplCount)
    for k, vs in cateCount.items():
        for i in xrange(len(vs)):
            N11 = vs[i]
            N10 = sum(vs) - N11
            N01 = pplCount[i] - N11
            N00 = N - N11 - N10 - N01
            mi = mutual_info(N, N11, N10 + N11, N01 + N11) + mutual_info(
                N, N10, N10 + N11, N00 + N10) + mutual_info(
                    N, N01, N01 + N11, N01 + N00) + mutual_info(
                        N, N00, N00 + N10, N00 + N01)
            miDict[k][i] = mi
    # print(miDict)

    f2 = open('Rec_featureFile.csv', 'w')
    writer2 = csv.writer(f2)
    writer2.writerow(pplCount)
    fWords = set()
    for i in xrange(len(pplCount)):
        keyf = lambda x: x[1][i]
        sortedDict = sorted(miDict.items(), key=keyf, reverse=True)
        # 10 * # of labels: # of most relevant fileCategory to ppl
        for j in xrange(30):
            fWords.add(sortedDict[j][0])
    writer2.writerow(fWords)

예제 #15

0

파일 보기

def plot_results_multiple(predicted_data, true_data, prediction_len):
    fig = plt.figure(facecolor='white')
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    print('yo')
    # Pad the list of predictions to shift it in the graph to it's correct start
    for i, data in enumerate(predicted_data):
        padding = [None for p in xrange(i * prediction_len)]
        plt.plot(padding + data, label='Prediction')
        plt.legend()
    plt.show()

예제 #16

0

파일 보기

def predict_sequence_full(model, data, window_size):
    # Shift the window by 1 new prediction each time, re-run predictions on new window
    curr_frame = data[0]
    predicted = []
    for i in xrange(len(data)):
        predicted.append(model.predict(curr_frame[newaxis, :, :])[0, 0])
        curr_frame = curr_frame[1:]
        curr_frame = np.insert(curr_frame, [window_size - 1],
                               predicted[-1],
                               axis=0)
    return predicted

예제 #17

0

파일 보기

 def correction_score(self, words_count, old_sentence, new_sentence):
     # """
     #   Take a old sentence and a new sentence, for each words in the new sentence, if it's same as the orginal sentence, assign 0.95 prob
     #   If it's not same as original sentence, give 0.05 / (count(similarword) - 1)
     # """
     score = 1
     for i in xrange(len(new_sentence)):
         if new_sentence[i] in words_count:
             score *= 0.95
         else:
             score *= (0.05 / (words_count[old_sentence[i]] - 1))
     return math.log(score)

예제 #18

0

파일 보기

파일: Algo.py 프로젝트: manan1997/Predict_Air_Grade

def gradient_descent_2(alpha, x, y, numIterations):
    m = x.shape[0]  # number of samples(rows)
    theta = np.ones(2)
    x_transpose = x.transpose()
    for iter in xrange(0, numIterations):
        hypothesis = np.dot(x, theta)
        loss = hypothesis - y
        J = np.sum(loss**2) / (2 * m)  # cost
        print("iter %s | J: %.3f" % (iter, J))
        gradient = np.dot(x_transpose, loss) / m
        theta = theta - alpha * gradient  # update
    return theta

예제 #19

0

파일 보기

파일: poloniex.py 프로젝트: Ricknoonan/TradingBot

    def post_process(self, before):
        after = before

        # Add timestamps if there isnt one but is a datetime
        if ('return' in after):
            if (isinstance(after['return'], list)):
                for x in xrange(0, len(after['return'])):
                    if (isinstance(after['return'][x], dict)):
                        if ('datetime' in after['return'][x] and 'timestamp' not in after['return'][x]):
                            after['return'][x]['timestamp'] = float(createTimeStamp(after['return'][x]['datetime']))

        return after

예제 #20

0

파일 보기

파일: data_loader.py 프로젝트: vittoriopipoli/icarl1

    def __init__(self, root,
                 train=True,
                 transform=None,
                 target_transform=None,
                 download=False, classes=None):
        super(iCIFAR10, self).__init__(root,
                                       train=train,
                                       classes=classes,
                                       transform=transform,
                                       target_transform=target_transform,
                                       download=download)

        # Select subset of classes
        if self.train:
            train_data = []
            train_labels = []
            for i in range(500*classes[0], 500*classes[-1]):
                train_data.append(i)
                train_labels.append(i)

            for i in xrange(len(self.train_data)):
                if self.train_labels[i] in classes:
                    train_data.append(self.train_data[i])
                    train_labels.append(self.train_labels[i])

            self.train_data = np.array(train_data)
            self.train_labels = train_labels

        else:
            test_data = []
            test_labels = []

            for i in xrange(len(self.test_data)):
                if self.test_labels[i] in classes:
                    test_data.append(self.test_data[i])
                    test_labels.append(self.test_labels[i])

            self.test_data = np.array(test_data)
            self.test_labels = test_labels

예제 #21

0

파일 보기

파일: No.978.py 프로젝트: riCoYanG-byte/leetcode

    def maxTurbulenceSize(self, A):
        N = len(A)
        ans = 1
        anchor = 0

        for i in xrange(1, N):
            c = cmp(A[i-1], A[i])
            if c == 0:
                anchor = i
            elif i == N-1 or c * cmp(A[i], A[i+1]) != -1:
                ans = max(ans, i - anchor + 1)
                anchor = i
        return ans

예제 #22

0

파일 보기

 def train(self):
     # """
     #   Train unigram and bigram
     # """
     for sentence in self.sentences:
         sentence.insert(0, '<s>')
         sentence.append('</s>')
         for i in xrange(len(sentence) - 1):
             token1 = sentence[i]
             token2 = sentence[i + 1]
             self.laplaceUnigramCounts[token1] += 1
             self.laplaceBigramCounts[(token1, token2)] += 1
             self.total += 1
         self.total += 1
         self.laplaceUnigramCounts[sentence[-1]] += 1

예제 #23

0

파일 보기

파일: Gerar um CPF válido.py 프로젝트: felipendc/vicyos-programming-files

def cpf_funcional():
    n = [random.randrange(10) for i in xrange(9)]

    # calcula digito 1 e acrescenta ao numero
    s = sum(x * y for x, y in zip(n, range(10, 1, -1)))
    d1 = 11 - s % 11
    if d1 >= 10:
        d1 = 0
    n.append(d1)

    # calcula digito 2 e acrescenta ao numero
    s = sum(x * y for x, y in zip(n, range(11, 1, -1)))
    d2 = 11 - s % 11
    if d2 >= 10:
        d2 = 0
    n.append(d2)

    return "%d%d%d.%d%d%d.%d%d%d-%d%d" % tuple(n)

예제 #24

0

파일 보기

파일: file_utils.py 프로젝트: pancudaniel7/reverse-binary-files

    def write_reverse_binary(self, input_file_path: str, output_file_path: str,
                             buffer_size: int):
        """
        Write reverse binary from input file to output file

        :param input_file_path: the input file path
        :param output_file_path: the output file path
        :param buffer_size: buffer chuck size.
        """
        self.__check_buffer_size(buffer_size)
        buffer_size = 1 << buffer_size

        with open(input_file_path, 'rb') as file, \
                open(output_file_path, 'wb') as file_out:
            file.seek(0, os.SEEK_END)
            for cursor_position in reversed(xrange(0, file.tell(),
                                                   buffer_size)):
                file.seek(cursor_position, os.SEEK_SET)
                file_out.write(file.read(buffer_size)[::-1])

예제 #25

0

파일 보기

def predict(featureFile, modelFile, testText):

    pplCount, features = load_feature_words(featureFile)
    pplCount = list(map(int, pplCount))
    pplScores = [math.log(count * 1.0 / sum(pplCount)) for count in pplCount]
    scores = load_model(modelFile)
    rCount = 0
    totCount = 0
    lable = []
    for line in testText:
        lable = line[0]
        fileCategory = line[1]
        index = lable2id(lable)
        preValues = list(pplScores)
        if fileCategory in features:
            for i in xrange(len(preValues)):
                preValues[i] += math.log(scores[fileCategory][i])
        m = max(preValues)
        pIndex = preValues.index(m)
        if pIndex == index:
            rCount += 1
        totCount += 1

예제 #26

0

파일 보기

    def _generate_every_dimension_combination(self):
        """Generates every combination of dimension values.

    Example: for two dimensions, with two possible values each:
    [(UK, MALE),
     (UK, FEMALE),
     (US, MALE),
     (US, FEMALE)]

    Returns:
      A generator for every combination of dimension values.
    """
        # Edge case: dimensions were defined but the report is empty. The dimension
        # values need to be flattened:
        number_of_values = len(
            list(itertools.chain(*self.dimension_values.values())))

        if self.dimension_values.keys() and not number_of_values:
            comb = ([] for _ in xrange(len(self.dimension_values)))
            return comb

        # Normal case:
        return itertools.product(*self.dimension_values.values())

예제 #27

0

파일 보기

파일: Rec_predictModel.py 프로젝트: JillNiu/720FinalProject

def train_bayes(trainText, featureFile):
    '''
        train naive bayes model
        compute the # of feature words appeared in each class
    '''
    pplCount, features = load_feature_words(featureFile)
    cateCount = collections.defaultdict(doc_dict)
    tCount = [0] * len(pplCount)
    for line in trainText:
        lable = line[0]
        fileCategory = line[1]
        index = lable2id(lable)
        if fileCategory in features:
            tCount[index] += 1
            cateCount[fileCategory][index] += 1

    csvfile = open('Rec_modelFile.csv', 'w')
    outModel = csv.writer(csvfile)
    # Laplace Smoothing
    for k, v in cateCount.items():
        scores = [(v[i] + 1) * 1.0 / (tCount[i] + len(cateCount))
                  for i in xrange(len(v))]
        outModel.writerow([k, scores])

예제 #28

0

파일 보기

파일: trainingDataCodeReviewer.py 프로젝트: JillNiu/720FinalProject

def predict(featureFile, modelFile, testText):
    '''
        预测文档的类标，标准输入每一行为一个文档
    '''
    docCounts, features = load_feature_words(featureFile)
    docCounts = list(map(int, docCounts))
    docScores = [math.log(count * 1.0 / sum(docCounts)) for count in docCounts]
    scores = load_model(modelFile)

    lable = []
    for line in testText:
        text = line.strip()
        words = text.split(' ')
        preValues = list(docScores)
        for word in words:
            if word in features:
                for i in xrange(len(preValues)):
                    preValues[i] += math.log(scores[word][i])
        m = max(preValues)
        pIndex = preValues.index(m)
        lable.append('C' + str(pIndex))
    # print(lable)
    return lable

예제 #29

0

파일 보기

def train_bayes(trainText, featureFile):
    '''
        train naive bayes model
        compute the # of feature words appeared in each class
    '''
    docCounts, features = load_feature_words(featureFile)
    wordCount = collections.defaultdict(doc_dict)
    tCount = [0] * len(docCounts)
    for line in trainText:
        lable, text = line.strip().split(' ', 1)
        index = lable2id(lable)
        words = text.split(' ')
        for word in words:
            if word in features:
                tCount[index] += 1
                wordCount[word][index] += 1

    csvfile = open('modelFile.csv', 'w')
    outModel = csv.writer(csvfile)
    # Laplace Smoothing
    for k, v in wordCount.items():
        scores = [(v[i] + 1) * 1.0 / (tCount[i] + len(wordCount))
                  for i in xrange(len(v))]
        outModel.writerow([k, scores])

예제 #30

0

파일 보기

def lable2id(lable):
    for i in xrange(len(lables)):
        if lable == lables[i]:
            return i