def main():
    X, xheader, yheader = mf.dekker_2_numpy_matrix(args.i)
    # print X, xheader, yheader
    print "Matrix shape: " + str(np.shape(X))

    # Step 1
    #  Rows and columns for which more than 30% of the entries were either undefined or zeros
    #  were removed from the matrix
    print "Removing NAs..."
    X_step1, xheader, yheader = mf.remove_NA_zeros(X, xheader, yheader, 0.65)
    # print X_step1, xheader, yheader
    print "Matrix shape: " + str(np.shape(X_step1))

    # Step 2
    # Determine whether to transpose matrix
    if args.c:
        # Transpose matrix
        print "Transposing matrix..."
        X_step2 = np.transpose(X_step1)
    else:
        X_step2 = X_step1

        # Step 3
        # Transform matrix to zscores by row
    print "Transforming rows to zscores..."
    X_step3 = stats.zscore(X_step2, axis=1)

    # Step 4
    # Train hmm model
    print "Training HMM"
    model = hmm.GaussianHMM(n_components=5, covariance_type="diag", n_iter=1000)
    model.fit(X_step3)
    print "HMM output"
    classes = model.predict(X_step3)
    if args.c:
        print "# of columns: " + str(len(xheader))
        print "# of class entries: " + str(len(classes))
        OUT = open("even_compartments.tab", "w")
        for i in range(len(xheader)):
            OUT.write(xheader[i] + "\t" + str(classes[i]) + "\n")
        OUT.close()
    else:
        print "# of rows: " + str(len(yheader))
        print "# of class entries: " + str(len(classes))
        OUT = open("odd_compartments.tab", "w")
        for i in range(len(yheader)):
            OUT.write(yheader[i] + "\t" + str(classes[i]) + "\n")
        OUT.close()
def main():

	X, xheader, yheader = mf.dekker_2_numpy_matrix(args.i)
	#print X, xheader, yheader
	print 'Matrix shape: ' + str(np.shape(X))

	# Step 1
	#  Rows and columns for which more than 30% of the entries were either undefined or zeros
	#  were removed from the matrix
	print 'Removing NAs...'
	X_step1, xheader, yheader = mf.remove_NA_zeros(X, xheader, yheader,.65)
	#print X_step1, xheader, yheader
	print 'Matrix shape: ' + str(np.shape(X_step1))

	#Calculate contact enrichment
	contact_enrich = np.zeros((5,5), dtype=object)
	for i in range(len(contact_enrich)):
		for j in range(len(contact_enrich[i])):
			contact_enrich[i,j] = []
	ODD_FH = open(args.o, 'r')
	EVEN_FH = open(args.e, 'r')
	odd = []
	even = []
	for line in ODD_FH:
		odd.append(int(line.split()[1]))
	ODD_FH.close()
	for line in EVEN_FH:
		even.append(int(line.split()[1]))
	EVEN_FH.close()
	# Size check
	if len(odd) != len(X_step1) or len(even) != len(X_step1[0,:]):
		print 'ERROR: size mismatches exist'
		quit()
 
	for i in range(len(X_step1)):
		if i % 1000 == 0:
			print 'On row: ' + str(i)
		for j in range(len(X_step1[0,:])):
			odd_class = odd[i]
			even_class = even[j] 
			contact_enrich[odd_class, even_class].append(X_step1[i,j])

	for i in range(len(contact_enrich)):
		for j in range(len(contact_enrich[0,:])):
			contact_enrich[i,j] = np.mean(contact_enrich[i,j])

	np.savetxt('contact_enrichment.tab', contact_enrich, delimiter='\t', fmt='%1.2f')
def main():

    parser = argparse.ArgumentParser(
        description=
        'Calculate contact enrichment to merge odd/even subcompartment calls')
    parser.add_argument('-i',
                        help='input interaction matrix file',
                        type=str,
                        required=True)
    parser.add_argument(
        '-o',
        help='odd chrom subcompartment calls (ex. odd_compartments.tab)',
        type=str,
        required=True)
    parser.add_argument(
        '-e',
        help='even chrom subcompartment calls (ex. even_compartments.tab)',
        type=str,
        required=True)
    args = parser.parse_args()

    X, xheader, yheader = mf.dekker_2_numpy_matrix(args.i)
    #print X, xheader, yheader
    print 'Matrix shape: ' + str(np.shape(X))

    # Step 1
    #  Rows and columns for which more than 30% of the entries were either undefined or zeros
    #  were removed from the matrix
    print 'Removing NAs...'
    X_step1, xheader, yheader = mf.remove_NA_zeros(X, xheader, yheader, .65)
    #print X_step1, xheader, yheader
    print 'Matrix shape: ' + str(np.shape(X_step1))

    #Calculate contact enrichment
    contact_enrich = np.zeros((5, 5), dtype=object)
    for i in range(len(contact_enrich)):
        for j in range(len(contact_enrich[i])):
            contact_enrich[i, j] = []
    ODD_FH = open(args.o, 'r')
    EVEN_FH = open(args.e, 'r')
    odd = []
    even = []
    for line in ODD_FH:
        odd.append(int(line.split()[1]))
    ODD_FH.close()
    for line in EVEN_FH:
        even.append(int(line.split()[1]))
    EVEN_FH.close()
    # Size check
    if len(odd) != len(X_step1) or len(even) != len(X_step1[0, :]):
        print 'ERROR: size mismatches exist'
        quit()

    for i in range(len(X_step1)):
        if i % 1000 == 0:
            print 'On row: ' + str(i)
        for j in range(len(X_step1[0, :])):
            odd_class = odd[i]
            even_class = even[j]
            contact_enrich[odd_class, even_class].append(X_step1[i, j])

    for i in range(len(contact_enrich)):
        for j in range(len(contact_enrich[0, :])):
            contact_enrich[i, j] = np.mean(contact_enrich[i, j])

    np.savetxt('contact_enrichment.tab',
               contact_enrich,
               delimiter='\t',
               fmt='%1.2f')
def main():

    parser = argparse.ArgumentParser(
        description='Perform Rao et al. (2014) HMM subcompartment analysis')
    parser.add_argument('-i',
                        help='input interaction matrix file',
                        type=str,
                        required=True)
    parser.add_argument('-c',
                        help='run HMM on columns/even chromosomes)',
                        type=bool,
                        default=False)

    args = parser.parse_args()

    X, xheader, yheader = mf.dekker_2_numpy_matrix(args.i)
    #print X, xheader, yheader
    print 'Matrix shape: ' + str(np.shape(X))

    # Step 1
    #  Rows and columns for which more than 30% of the entries were either undefined or zeros
    #  were removed from the matrix
    print 'Removing NAs...'
    X_step1, xheader, yheader = mf.remove_NA_zeros(X, xheader, yheader, .65)
    #print X_step1, xheader, yheader
    print 'Matrix shape: ' + str(np.shape(X_step1))

    # Step 2
    # Determine whether to transpose matrix
    if args.c:
        # Transpose matrix
        print 'Transposing matrix...'
        X_step2 = np.transpose(X_step1)
    else:
        X_step2 = X_step1

    # Step 3
    # Transform matrix to zscores by row
    print 'Transforming rows to zscores...'
    X_step3 = stats.zscore(X_step2, axis=1)

    # Step 4
    # Train hmm model
    print 'Training HMM'
    model = hmm.GaussianHMM(n_components=5,
                            covariance_type='diag',
                            n_iter=1000)
    model.fit(X_step3)
    print 'HMM output'
    classes = model.predict(X_step3)
    if args.c:
        print '# of columns: ' + str(len(xheader))
        print '# of class entries: ' + str(len(classes))
        OUT = open('even_compartments.tab', 'w')
        for i in range(len(xheader)):
            OUT.write(xheader[i] + '\t' + str(classes[i]) + '\n')
        OUT.close()
    else:
        print '# of rows: ' + str(len(yheader))
        print '# of class entries: ' + str(len(classes))
        OUT = open('odd_compartments.tab', 'w')
        for i in range(len(yheader)):
            OUT.write(yheader[i] + '\t' + str(classes[i]) + '\n')
        OUT.close()